diff mbox series

[v2,20/28] iomap: Convert iomap_write_begin() and iomap_write_end() to folios

Message ID 20211108040551.1942823-21-willy@infradead.org (mailing list archive)
State New, archived
Headers show
Series iomap/xfs folio patches | expand

Commit Message

Matthew Wilcox Nov. 8, 2021, 4:05 a.m. UTC
These functions still only work in PAGE_SIZE chunks, but there are
fewer conversions from tail to head pages as a result of this patch.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 fs/iomap/buffered-io.c | 66 ++++++++++++++++++++----------------------
 1 file changed, 31 insertions(+), 35 deletions(-)

Comments

Darrick J. Wong Nov. 17, 2021, 4:31 a.m. UTC | #1
On Mon, Nov 08, 2021 at 04:05:43AM +0000, Matthew Wilcox (Oracle) wrote:
> These functions still only work in PAGE_SIZE chunks, but there are
> fewer conversions from tail to head pages as a result of this patch.
> 
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> Reviewed-by: Christoph Hellwig <hch@lst.de>
> ---
>  fs/iomap/buffered-io.c | 66 ++++++++++++++++++++----------------------
>  1 file changed, 31 insertions(+), 35 deletions(-)
> 
> diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
> index 9c61d12028ca..f4ae200adc4c 100644
> --- a/fs/iomap/buffered-io.c
> +++ b/fs/iomap/buffered-io.c

<snip>

> @@ -741,6 +737,7 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
>  	long status = 0;
>  
>  	do {
> +		struct folio *folio;
>  		struct page *page;
>  		unsigned long offset;	/* Offset into pagecache page */
>  		unsigned long bytes;	/* Bytes to write to page */
> @@ -764,16 +761,17 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
>  			break;
>  		}
>  
> -		status = iomap_write_begin(iter, pos, bytes, &page);
> +		status = iomap_write_begin(iter, pos, bytes, &folio);
>  		if (unlikely(status))
>  			break;
>  
> +		page = folio_file_page(folio, pos >> PAGE_SHIFT);
>  		if (mapping_writably_mapped(iter->inode->i_mapping))
>  			flush_dcache_page(page);
>  
>  		copied = copy_page_from_iter_atomic(page, offset, bytes, i);

Hrmm.  In principle (or I guess even a subsequent patch), if we had
multi-page folios, could we simply loop the pages in the folio instead
of doing a single page and then calling back into iomap_write_begin to
get (probably) the same folio?

This looks like a fairly straightforward conversion, but I was wondering
about that one little point...

Reviewed-by: Darrick J. Wong <djwong@kernel.org>

--D

>  
> -		status = iomap_write_end(iter, pos, bytes, copied, page);
> +		status = iomap_write_end(iter, pos, bytes, copied, folio);
>  
>  		if (unlikely(copied != status))
>  			iov_iter_revert(i, copied - status);
> @@ -839,13 +837,13 @@ static loff_t iomap_unshare_iter(struct iomap_iter *iter)
>  	do {
>  		unsigned long offset = offset_in_page(pos);
>  		unsigned long bytes = min_t(loff_t, PAGE_SIZE - offset, length);
> -		struct page *page;
> +		struct folio *folio;
>  
> -		status = iomap_write_begin(iter, pos, bytes, &page);
> +		status = iomap_write_begin(iter, pos, bytes, &folio);
>  		if (unlikely(status))
>  			return status;
>  
> -		status = iomap_write_end(iter, pos, bytes, bytes, page);
> +		status = iomap_write_end(iter, pos, bytes, bytes, folio);
>  		if (WARN_ON_ONCE(status == 0))
>  			return -EIO;
>  
> @@ -882,21 +880,19 @@ EXPORT_SYMBOL_GPL(iomap_file_unshare);
>  static s64 __iomap_zero_iter(struct iomap_iter *iter, loff_t pos, u64 length)
>  {
>  	struct folio *folio;
> -	struct page *page;
>  	int status;
>  	size_t offset, bytes;
>  
> -	status = iomap_write_begin(iter, pos, length, &page);
> +	status = iomap_write_begin(iter, pos, length, &folio);
>  	if (status)
>  		return status;
> -	folio = page_folio(page);
>  
>  	offset = offset_in_folio(folio, pos);
>  	bytes = min_t(u64, folio_size(folio) - offset, length);
>  	folio_zero_range(folio, offset, bytes);
>  	folio_mark_accessed(folio);
>  
> -	return iomap_write_end(iter, pos, bytes, bytes, page);
> +	return iomap_write_end(iter, pos, bytes, bytes, folio);
>  }
>  
>  static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero)
> -- 
> 2.33.0
>
Matthew Wilcox Nov. 17, 2021, 2:31 p.m. UTC | #2
On Tue, Nov 16, 2021 at 08:31:27PM -0800, Darrick J. Wong wrote:
> > @@ -764,16 +761,17 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
> >  			break;
> >  		}
> >  
> > -		status = iomap_write_begin(iter, pos, bytes, &page);
> > +		status = iomap_write_begin(iter, pos, bytes, &folio);
> >  		if (unlikely(status))
> >  			break;
> >  
> > +		page = folio_file_page(folio, pos >> PAGE_SHIFT);
> >  		if (mapping_writably_mapped(iter->inode->i_mapping))
> >  			flush_dcache_page(page);
> >  
> >  		copied = copy_page_from_iter_atomic(page, offset, bytes, i);
> 
> Hrmm.  In principle (or I guess even a subsequent patch), if we had
> multi-page folios, could we simply loop the pages in the folio instead
> of doing a single page and then calling back into iomap_write_begin to
> get (probably) the same folio?
> 
> This looks like a fairly straightforward conversion, but I was wondering
> about that one little point...

Theoretically, yes, we should be able to do that.  But all of this code
is pretty subtle ("What if we hit a page fault?  What if we're writing
to part of this folio from an mmap of a different part of this folio?
What if it's !Uptodate?  What if we hit this weird ARM super-mprotect
memory tag thing?  What if ...") and, frankly, I got scared.  So I've
left that as future work; someone else can try to wrap their brain around
all of this.
Darrick J. Wong Nov. 17, 2021, 5:10 p.m. UTC | #3
On Wed, Nov 17, 2021 at 02:31:26PM +0000, Matthew Wilcox wrote:
> On Tue, Nov 16, 2021 at 08:31:27PM -0800, Darrick J. Wong wrote:
> > > @@ -764,16 +761,17 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
> > >  			break;
> > >  		}
> > >  
> > > -		status = iomap_write_begin(iter, pos, bytes, &page);
> > > +		status = iomap_write_begin(iter, pos, bytes, &folio);
> > >  		if (unlikely(status))
> > >  			break;
> > >  
> > > +		page = folio_file_page(folio, pos >> PAGE_SHIFT);
> > >  		if (mapping_writably_mapped(iter->inode->i_mapping))
> > >  			flush_dcache_page(page);
> > >  
> > >  		copied = copy_page_from_iter_atomic(page, offset, bytes, i);
> > 
> > Hrmm.  In principle (or I guess even a subsequent patch), if we had
> > multi-page folios, could we simply loop the pages in the folio instead
> > of doing a single page and then calling back into iomap_write_begin to
> > get (probably) the same folio?
> > 
> > This looks like a fairly straightforward conversion, but I was wondering
> > about that one little point...
> 
> Theoretically, yes, we should be able to do that.  But all of this code
> is pretty subtle ("What if we hit a page fault?  What if we're writing
> to part of this folio from an mmap of a different part of this folio?
> What if it's !Uptodate?  What if we hit this weird ARM super-mprotect
> memory tag thing?  What if ...") and, frankly, I got scared.  So I've
> left that as future work; someone else can try to wrap their brain around
> all of this.

<nod> That's roughly the same conclusion I came to -- conceptually we
could keep walking pages until we hit /any/ problem or other difference
with the first page that we don't feel like dealing with, and pass that
count to iomap_end... but no need to try that right this second.

Just checking that I grokked what's going on in this series. :)

--D
diff mbox series

Patch

diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 9c61d12028ca..f4ae200adc4c 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -539,9 +539,8 @@  static int iomap_read_folio_sync(loff_t block_start, struct folio *folio,
 }
 
 static int __iomap_write_begin(const struct iomap_iter *iter, loff_t pos,
-		unsigned len, struct page *page)
+		size_t len, struct folio *folio)
 {
-	struct folio *folio = page_folio(page);
 	const struct iomap *srcmap = iomap_iter_srcmap(iter);
 	struct iomap_page *iop = iomap_page_create(iter->inode, folio);
 	loff_t block_size = i_blocksize(iter->inode);
@@ -582,9 +581,8 @@  static int __iomap_write_begin(const struct iomap_iter *iter, loff_t pos,
 }
 
 static int iomap_write_begin_inline(const struct iomap_iter *iter,
-		struct page *page)
+		struct folio *folio)
 {
-	struct folio *folio = page_folio(page);
 	int ret;
 
 	/* needs more work for the tailpacking case; disable for now */
@@ -597,12 +595,12 @@  static int iomap_write_begin_inline(const struct iomap_iter *iter,
 }
 
 static int iomap_write_begin(const struct iomap_iter *iter, loff_t pos,
-		unsigned len, struct page **pagep)
+		size_t len, struct folio **foliop)
 {
 	const struct iomap_page_ops *page_ops = iter->iomap.page_ops;
 	const struct iomap *srcmap = iomap_iter_srcmap(iter);
-	struct page *page;
 	struct folio *folio;
+	unsigned fgp = FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE | FGP_NOFS;
 	int status = 0;
 
 	BUG_ON(pos + len > iter->iomap.offset + iter->iomap.length);
@@ -618,30 +616,29 @@  static int iomap_write_begin(const struct iomap_iter *iter, loff_t pos,
 			return status;
 	}
 
-	page = grab_cache_page_write_begin(iter->inode->i_mapping,
-				pos >> PAGE_SHIFT, AOP_FLAG_NOFS);
-	if (!page) {
+	folio = __filemap_get_folio(iter->inode->i_mapping, pos >> PAGE_SHIFT,
+			fgp, mapping_gfp_mask(iter->inode->i_mapping));
+	if (!folio) {
 		status = -ENOMEM;
 		goto out_no_page;
 	}
-	folio = page_folio(page);
 
 	if (srcmap->type == IOMAP_INLINE)
-		status = iomap_write_begin_inline(iter, page);
+		status = iomap_write_begin_inline(iter, folio);
 	else if (srcmap->flags & IOMAP_F_BUFFER_HEAD)
 		status = __block_write_begin_int(folio, pos, len, NULL, srcmap);
 	else
-		status = __iomap_write_begin(iter, pos, len, page);
+		status = __iomap_write_begin(iter, pos, len, folio);
 
 	if (unlikely(status))
 		goto out_unlock;
 
-	*pagep = page;
+	*foliop = folio;
 	return 0;
 
 out_unlock:
-	unlock_page(page);
-	put_page(page);
+	folio_unlock(folio);
+	folio_put(folio);
 	iomap_write_failed(iter->inode, pos, len);
 
 out_no_page:
@@ -651,11 +648,10 @@  static int iomap_write_begin(const struct iomap_iter *iter, loff_t pos,
 }
 
 static size_t __iomap_write_end(struct inode *inode, loff_t pos, size_t len,
-		size_t copied, struct page *page)
+		size_t copied, struct folio *folio)
 {
-	struct folio *folio = page_folio(page);
 	struct iomap_page *iop = to_iomap_page(folio);
-	flush_dcache_page(page);
+	flush_dcache_folio(folio);
 
 	/*
 	 * The blocks that were entirely written will now be uptodate, so we
@@ -668,10 +664,10 @@  static size_t __iomap_write_end(struct inode *inode, loff_t pos, size_t len,
 	 * non-uptodate page as a zero-length write, and force the caller to
 	 * redo the whole thing.
 	 */
-	if (unlikely(copied < len && !PageUptodate(page)))
+	if (unlikely(copied < len && !folio_test_uptodate(folio)))
 		return 0;
 	iomap_set_range_uptodate(folio, iop, offset_in_folio(folio, pos), len);
-	__set_page_dirty_nobuffers(page);
+	filemap_dirty_folio(inode->i_mapping, folio);
 	return copied;
 }
 
@@ -695,7 +691,7 @@  static size_t iomap_write_end_inline(const struct iomap_iter *iter,
 
 /* Returns the number of bytes copied.  May be 0.  Cannot be an errno. */
 static size_t iomap_write_end(struct iomap_iter *iter, loff_t pos, size_t len,
-		size_t copied, struct page *page)
+		size_t copied, struct folio *folio)
 {
 	const struct iomap_page_ops *page_ops = iter->iomap.page_ops;
 	const struct iomap *srcmap = iomap_iter_srcmap(iter);
@@ -706,9 +702,9 @@  static size_t iomap_write_end(struct iomap_iter *iter, loff_t pos, size_t len,
 		ret = iomap_write_end_inline(iter, page, pos, copied);
 	} else if (srcmap->flags & IOMAP_F_BUFFER_HEAD) {
 		ret = block_write_end(NULL, iter->inode->i_mapping, pos, len,
-				copied, page, NULL);
+				copied, &folio->page, NULL);
 	} else {
-		ret = __iomap_write_end(iter->inode, pos, len, copied, page);
+		ret = __iomap_write_end(iter->inode, pos, len, copied, folio);
 	}
 
 	/*
@@ -720,13 +716,13 @@  static size_t iomap_write_end(struct iomap_iter *iter, loff_t pos, size_t len,
 		i_size_write(iter->inode, pos + ret);
 		iter->iomap.flags |= IOMAP_F_SIZE_CHANGED;
 	}
-	unlock_page(page);
+	folio_unlock(folio);
 
 	if (old_size < pos)
 		pagecache_isize_extended(iter->inode, old_size, pos);
 	if (page_ops && page_ops->page_done)
-		page_ops->page_done(iter->inode, pos, ret, page);
-	put_page(page);
+		page_ops->page_done(iter->inode, pos, ret, &folio->page);
+	folio_put(folio);
 
 	if (ret < len)
 		iomap_write_failed(iter->inode, pos, len);
@@ -741,6 +737,7 @@  static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
 	long status = 0;
 
 	do {
+		struct folio *folio;
 		struct page *page;
 		unsigned long offset;	/* Offset into pagecache page */
 		unsigned long bytes;	/* Bytes to write to page */
@@ -764,16 +761,17 @@  static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
 			break;
 		}
 
-		status = iomap_write_begin(iter, pos, bytes, &page);
+		status = iomap_write_begin(iter, pos, bytes, &folio);
 		if (unlikely(status))
 			break;
 
+		page = folio_file_page(folio, pos >> PAGE_SHIFT);
 		if (mapping_writably_mapped(iter->inode->i_mapping))
 			flush_dcache_page(page);
 
 		copied = copy_page_from_iter_atomic(page, offset, bytes, i);
 
-		status = iomap_write_end(iter, pos, bytes, copied, page);
+		status = iomap_write_end(iter, pos, bytes, copied, folio);
 
 		if (unlikely(copied != status))
 			iov_iter_revert(i, copied - status);
@@ -839,13 +837,13 @@  static loff_t iomap_unshare_iter(struct iomap_iter *iter)
 	do {
 		unsigned long offset = offset_in_page(pos);
 		unsigned long bytes = min_t(loff_t, PAGE_SIZE - offset, length);
-		struct page *page;
+		struct folio *folio;
 
-		status = iomap_write_begin(iter, pos, bytes, &page);
+		status = iomap_write_begin(iter, pos, bytes, &folio);
 		if (unlikely(status))
 			return status;
 
-		status = iomap_write_end(iter, pos, bytes, bytes, page);
+		status = iomap_write_end(iter, pos, bytes, bytes, folio);
 		if (WARN_ON_ONCE(status == 0))
 			return -EIO;
 
@@ -882,21 +880,19 @@  EXPORT_SYMBOL_GPL(iomap_file_unshare);
 static s64 __iomap_zero_iter(struct iomap_iter *iter, loff_t pos, u64 length)
 {
 	struct folio *folio;
-	struct page *page;
 	int status;
 	size_t offset, bytes;
 
-	status = iomap_write_begin(iter, pos, length, &page);
+	status = iomap_write_begin(iter, pos, length, &folio);
 	if (status)
 		return status;
-	folio = page_folio(page);
 
 	offset = offset_in_folio(folio, pos);
 	bytes = min_t(u64, folio_size(folio) - offset, length);
 	folio_zero_range(folio, offset, bytes);
 	folio_mark_accessed(folio);
 
-	return iomap_write_end(iter, pos, bytes, bytes, page);
+	return iomap_write_end(iter, pos, bytes, bytes, folio);
 }
 
 static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero)