diff mbox series

[v6] iomap: support tail packing inline read

Message ID 20210722031729.51628-1-hsiangkao@linux.alibaba.com (mailing list archive)
State New, archived
Headers show
Series [v6] iomap: support tail packing inline read | expand

Commit Message

Gao Xiang July 22, 2021, 3:17 a.m. UTC
This tries to add tailpacking inline read to iomap, which only supports
page-aligned tailpacking cases of a few extra inline bytes right after
the inode itself, which is how the current EROFS tail-packing works.
Similar to the previous approach, it cleans post-EOF in one iteration.

The write path remains untouched since EROFS cannot be used for testing.
It'd be better to be implemented if upcoming real users care rather than
leave untested dead code around.

Cc: Christoph Hellwig <hch@lst.de>
Cc: Darrick J. Wong <djwong@kernel.org>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Andreas Gruenbacher <andreas.gruenbacher@gmail.com>
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
---
v5: https://lore.kernel.org/r/20210721082323.41933-1-hsiangkao@linux.alibaba.com
changes since v5 (pointed out by Darrick):
 - Update the code to handle inline data mappings where iomap->offset
   is not zero but the start of the mapping is always page-aligned;

 - introduce a new helper called iomap_inline_buf() to wrap up:
    (iomap->inline_data + pos - iomap->offset)
   since it's used for buffered-io.c and direct-io.c, thus it keeps in
   linux/iomap.h;

 - simplify "size = iomap->length + iomap->offset - pos" since
	if (WARN_ON_ONCE(iomap->length > PAGE_SIZE -
			 offset_in_page(iomap->inline_data)))
		return -EIO;
   already limiting iomap->length within the PAGE_SIZE;

 - update 2 inlined comments;

 - note that I leave "BUG_ON(page_has_private(page));" as-is since
   it was just added in 5.14-rc2 and I'm not sure WARN_ON_ONCE and
   bailing out such cases is really safe for fs developers.

Hopefully I don't miss anything and everyone is happy with this version,
and I will retest this version with looping this 2-level random dir test
this evening:
https://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs-utils.git/commit/?id=af7de830f86c2f24a510857e413ea7992e699832

Many thanks for your time,
Gao Xiang

 fs/iomap/buffered-io.c | 43 +++++++++++++++++++++++++++++-------------
 fs/iomap/direct-io.c   | 15 +++++++++++----
 include/linux/iomap.h  |  6 ++++++
 3 files changed, 47 insertions(+), 17 deletions(-)

Comments

Christoph Hellwig July 22, 2021, 5:39 a.m. UTC | #1
I think some of the language here is confusing - mostly about tail
packing when we otherwise use inline data.  Can you take a look at
the version below?  This mostly cleans up the terminology, adds a
new helper to check the size, and removes the error on trying to
write with a non-zero pos, as it can be trivially supported now.

---
From 0f9c6ac6c2e372739b29195d25bebb8dd87e583a Mon Sep 17 00:00:00 2001
From: Gao Xiang <hsiangkao@linux.alibaba.com>
Date: Thu, 22 Jul 2021 11:17:29 +0800
Subject: iomap: make inline data support more flexible

Add support for offsets into the inline data page at iomap->inline_data
to cater for the EROFS tailpackng case where a small data is stored
right after the inode.

Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
---
 fs/iomap/buffered-io.c | 35 ++++++++++++++++++-----------------
 fs/iomap/direct-io.c   | 10 ++++++----
 include/linux/iomap.h  | 14 ++++++++++++++
 3 files changed, 38 insertions(+), 21 deletions(-)

diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 87ccb3438becd9..0597f5c186a33f 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -205,25 +205,29 @@ struct iomap_readpage_ctx {
 	struct readahead_control *rac;
 };
 
-static void
-iomap_read_inline_data(struct inode *inode, struct page *page,
-		struct iomap *iomap)
+static int iomap_read_inline_data(struct inode *inode, struct page *page,
+		struct iomap *iomap, loff_t pos)
 {
-	size_t size = i_size_read(inode);
+	size_t size = iomap->length + iomap->offset - pos;
 	void *addr;
 
 	if (PageUptodate(page))
-		return;
+		return PAGE_SIZE;
 
-	BUG_ON(page_has_private(page));
-	BUG_ON(page->index);
-	BUG_ON(size > PAGE_SIZE - offset_in_page(iomap->inline_data));
+	/* inline data must start page aligned in the file */
+	if (WARN_ON_ONCE(offset_in_page(pos)))
+		return -EIO;
+	if (WARN_ON_ONCE(!iomap_inline_data_size_valid(iomap)))
+		return -EIO;
+	if (WARN_ON_ONCE(page_has_private(page)))
+		return -EIO;
 
 	addr = kmap_atomic(page);
-	memcpy(addr, iomap->inline_data, size);
+	memcpy(addr, iomap_inline_buf(iomap, pos), size);
 	memset(addr + size, 0, PAGE_SIZE - size);
 	kunmap_atomic(addr);
 	SetPageUptodate(page);
+	return PAGE_SIZE;
 }
 
 static inline bool iomap_block_needs_zeroing(struct inode *inode,
@@ -246,11 +250,8 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
 	unsigned poff, plen;
 	sector_t sector;
 
-	if (iomap->type == IOMAP_INLINE) {
-		WARN_ON_ONCE(pos);
-		iomap_read_inline_data(inode, page, iomap);
-		return PAGE_SIZE;
-	}
+	if (iomap->type == IOMAP_INLINE)
+		return iomap_read_inline_data(inode, page, iomap, pos);
 
 	/* zero post-eof blocks as the page may be mapped */
 	iop = iomap_page_create(inode, page);
@@ -618,14 +619,14 @@ iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
 	}
 
 	if (srcmap->type == IOMAP_INLINE)
-		iomap_read_inline_data(inode, page, srcmap);
+		status = iomap_read_inline_data(inode, page, srcmap, pos);
 	else if (iomap->flags & IOMAP_F_BUFFER_HEAD)
 		status = __block_write_begin_int(page, pos, len, NULL, srcmap);
 	else
 		status = __iomap_write_begin(inode, pos, len, flags, page,
 				srcmap);
 
-	if (unlikely(status))
+	if (unlikely(status < 0))
 		goto out_unlock;
 
 	*pagep = page;
@@ -675,7 +676,7 @@ static size_t iomap_write_end_inline(struct inode *inode, struct page *page,
 
 	flush_dcache_page(page);
 	addr = kmap_atomic(page);
-	memcpy(iomap->inline_data + pos, addr + pos, copied);
+	memcpy(iomap_inline_buf(iomap, pos), addr + pos, copied);
 	kunmap_atomic(addr);
 
 	mark_inode_dirty(inode);
diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
index 9398b8c31323b3..a6aaea2764a55f 100644
--- a/fs/iomap/direct-io.c
+++ b/fs/iomap/direct-io.c
@@ -378,23 +378,25 @@ iomap_dio_inline_actor(struct inode *inode, loff_t pos, loff_t length,
 		struct iomap_dio *dio, struct iomap *iomap)
 {
 	struct iov_iter *iter = dio->submit.iter;
+	void *dst = iomap_inline_buf(iomap, pos);
 	size_t copied;
 
-	BUG_ON(pos + length > PAGE_SIZE - offset_in_page(iomap->inline_data));
+	if (WARN_ON_ONCE(!iomap_inline_data_size_valid(iomap)))
+		return -EIO;
 
 	if (dio->flags & IOMAP_DIO_WRITE) {
 		loff_t size = inode->i_size;
 
 		if (pos > size)
-			memset(iomap->inline_data + size, 0, pos - size);
-		copied = copy_from_iter(iomap->inline_data + pos, length, iter);
+			memset(iomap_inline_buf(iomap, size), 0, pos - size);
+		copied = copy_from_iter(dst, length, iter);
 		if (copied) {
 			if (pos + copied > size)
 				i_size_write(inode, pos + copied);
 			mark_inode_dirty(inode);
 		}
 	} else {
-		copied = copy_to_iter(iomap->inline_data + pos, length, iter);
+		copied = copy_to_iter(dst, length, iter);
 	}
 	dio->size += copied;
 	return copied;
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 479c1da3e2211e..5efae7153912ed 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -97,6 +97,20 @@ iomap_sector(struct iomap *iomap, loff_t pos)
 	return (iomap->addr + pos - iomap->offset) >> SECTOR_SHIFT;
 }
 
+static inline void *iomap_inline_buf(const struct iomap *iomap, loff_t pos)
+{
+	return iomap->inline_data - iomap->offset + pos;
+}
+
+/*
+ * iomap->inline_data is a potentially kmapped page, ensure it never crosseѕ a
+ * page boundary.
+ */
+static inline bool iomap_inline_data_size_valid(const struct iomap *iomap)
+{
+	return iomap->length <= PAGE_SIZE - offset_in_page(iomap->inline_data);
+}
+
 /*
  * When a filesystem sets page_ops in an iomap mapping it returns, page_prepare
  * and page_done will be called for each page written to.  This only applies to
Gao Xiang July 22, 2021, 5:56 a.m. UTC | #2
Hi Christoph,

On Thu, Jul 22, 2021 at 07:39:47AM +0200, Christoph Hellwig wrote:
> I think some of the language here is confusing - mostly about tail
> packing when we otherwise use inline data.  Can you take a look at
> the version below?  This mostly cleans up the terminology, adds a
> new helper to check the size, and removes the error on trying to
> write with a non-zero pos, as it can be trivially supported now.
> 

Many thanks for your time and hard work on revising this again. I'm
fine with this version and the update for iomap_write_begin(), and
I will do test hours later as I said before.

Hopefully this version could be confirmed by Andreas on the gfs2
side as well.

Thanks,
Gao Xiang

> ---
> From 0f9c6ac6c2e372739b29195d25bebb8dd87e583a Mon Sep 17 00:00:00 2001
> From: Gao Xiang <hsiangkao@linux.alibaba.com>
> Date: Thu, 22 Jul 2021 11:17:29 +0800
> Subject: iomap: make inline data support more flexible
> 
> Add support for offsets into the inline data page at iomap->inline_data
> to cater for the EROFS tailpackng case where a small data is stored
> right after the inode.
> 
> Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
> ---
>  fs/iomap/buffered-io.c | 35 ++++++++++++++++++-----------------
>  fs/iomap/direct-io.c   | 10 ++++++----
>  include/linux/iomap.h  | 14 ++++++++++++++
>  3 files changed, 38 insertions(+), 21 deletions(-)
> 
> diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
> index 87ccb3438becd9..0597f5c186a33f 100644
> --- a/fs/iomap/buffered-io.c
> +++ b/fs/iomap/buffered-io.c
> @@ -205,25 +205,29 @@ struct iomap_readpage_ctx {
>  	struct readahead_control *rac;
>  };
>  
> -static void
> -iomap_read_inline_data(struct inode *inode, struct page *page,
> -		struct iomap *iomap)
> +static int iomap_read_inline_data(struct inode *inode, struct page *page,
> +		struct iomap *iomap, loff_t pos)
>  {
> -	size_t size = i_size_read(inode);
> +	size_t size = iomap->length + iomap->offset - pos;
>  	void *addr;
>  
>  	if (PageUptodate(page))
> -		return;
> +		return PAGE_SIZE;
>  
> -	BUG_ON(page_has_private(page));
> -	BUG_ON(page->index);
> -	BUG_ON(size > PAGE_SIZE - offset_in_page(iomap->inline_data));
> +	/* inline data must start page aligned in the file */
> +	if (WARN_ON_ONCE(offset_in_page(pos)))
> +		return -EIO;
> +	if (WARN_ON_ONCE(!iomap_inline_data_size_valid(iomap)))
> +		return -EIO;
> +	if (WARN_ON_ONCE(page_has_private(page)))
> +		return -EIO;
>  
>  	addr = kmap_atomic(page);
> -	memcpy(addr, iomap->inline_data, size);
> +	memcpy(addr, iomap_inline_buf(iomap, pos), size);
>  	memset(addr + size, 0, PAGE_SIZE - size);
>  	kunmap_atomic(addr);
>  	SetPageUptodate(page);
> +	return PAGE_SIZE;
>  }
>  
>  static inline bool iomap_block_needs_zeroing(struct inode *inode,
> @@ -246,11 +250,8 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
>  	unsigned poff, plen;
>  	sector_t sector;
>  
> -	if (iomap->type == IOMAP_INLINE) {
> -		WARN_ON_ONCE(pos);
> -		iomap_read_inline_data(inode, page, iomap);
> -		return PAGE_SIZE;
> -	}
> +	if (iomap->type == IOMAP_INLINE)
> +		return iomap_read_inline_data(inode, page, iomap, pos);
>  
>  	/* zero post-eof blocks as the page may be mapped */
>  	iop = iomap_page_create(inode, page);
> @@ -618,14 +619,14 @@ iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
>  	}
>  
>  	if (srcmap->type == IOMAP_INLINE)
> -		iomap_read_inline_data(inode, page, srcmap);
> +		status = iomap_read_inline_data(inode, page, srcmap, pos);
>  	else if (iomap->flags & IOMAP_F_BUFFER_HEAD)
>  		status = __block_write_begin_int(page, pos, len, NULL, srcmap);
>  	else
>  		status = __iomap_write_begin(inode, pos, len, flags, page,
>  				srcmap);
>  
> -	if (unlikely(status))
> +	if (unlikely(status < 0))
>  		goto out_unlock;
>  
>  	*pagep = page;
> @@ -675,7 +676,7 @@ static size_t iomap_write_end_inline(struct inode *inode, struct page *page,
>  
>  	flush_dcache_page(page);
>  	addr = kmap_atomic(page);
> -	memcpy(iomap->inline_data + pos, addr + pos, copied);
> +	memcpy(iomap_inline_buf(iomap, pos), addr + pos, copied);
>  	kunmap_atomic(addr);
>  
>  	mark_inode_dirty(inode);
> diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
> index 9398b8c31323b3..a6aaea2764a55f 100644
> --- a/fs/iomap/direct-io.c
> +++ b/fs/iomap/direct-io.c
> @@ -378,23 +378,25 @@ iomap_dio_inline_actor(struct inode *inode, loff_t pos, loff_t length,
>  		struct iomap_dio *dio, struct iomap *iomap)
>  {
>  	struct iov_iter *iter = dio->submit.iter;
> +	void *dst = iomap_inline_buf(iomap, pos);
>  	size_t copied;
>  
> -	BUG_ON(pos + length > PAGE_SIZE - offset_in_page(iomap->inline_data));
> +	if (WARN_ON_ONCE(!iomap_inline_data_size_valid(iomap)))
> +		return -EIO;
>  
>  	if (dio->flags & IOMAP_DIO_WRITE) {
>  		loff_t size = inode->i_size;
>  
>  		if (pos > size)
> -			memset(iomap->inline_data + size, 0, pos - size);
> -		copied = copy_from_iter(iomap->inline_data + pos, length, iter);
> +			memset(iomap_inline_buf(iomap, size), 0, pos - size);
> +		copied = copy_from_iter(dst, length, iter);
>  		if (copied) {
>  			if (pos + copied > size)
>  				i_size_write(inode, pos + copied);
>  			mark_inode_dirty(inode);
>  		}
>  	} else {
> -		copied = copy_to_iter(iomap->inline_data + pos, length, iter);
> +		copied = copy_to_iter(dst, length, iter);
>  	}
>  	dio->size += copied;
>  	return copied;
> diff --git a/include/linux/iomap.h b/include/linux/iomap.h
> index 479c1da3e2211e..5efae7153912ed 100644
> --- a/include/linux/iomap.h
> +++ b/include/linux/iomap.h
> @@ -97,6 +97,20 @@ iomap_sector(struct iomap *iomap, loff_t pos)
>  	return (iomap->addr + pos - iomap->offset) >> SECTOR_SHIFT;
>  }
>  
> +static inline void *iomap_inline_buf(const struct iomap *iomap, loff_t pos)
> +{
> +	return iomap->inline_data - iomap->offset + pos;
> +}
> +
> +/*
> + * iomap->inline_data is a potentially kmapped page, ensure it never crosseѕ a
> + * page boundary.
> + */
> +static inline bool iomap_inline_data_size_valid(const struct iomap *iomap)
> +{
> +	return iomap->length <= PAGE_SIZE - offset_in_page(iomap->inline_data);
> +}
> +
>  /*
>   * When a filesystem sets page_ops in an iomap mapping it returns, page_prepare
>   * and page_done will be called for each page written to.  This only applies to
> -- 
> 2.30.2
Darrick J. Wong July 22, 2021, 4:51 p.m. UTC | #3
On Thu, Jul 22, 2021 at 07:39:47AM +0200, Christoph Hellwig wrote:
> I think some of the language here is confusing - mostly about tail
> packing when we otherwise use inline data.  Can you take a look at
> the version below?  This mostly cleans up the terminology, adds a
> new helper to check the size, and removes the error on trying to
> write with a non-zero pos, as it can be trivially supported now.
> 
> ---
> From 0f9c6ac6c2e372739b29195d25bebb8dd87e583a Mon Sep 17 00:00:00 2001
> From: Gao Xiang <hsiangkao@linux.alibaba.com>
> Date: Thu, 22 Jul 2021 11:17:29 +0800
> Subject: iomap: make inline data support more flexible
> 
> Add support for offsets into the inline data page at iomap->inline_data
> to cater for the EROFS tailpackng case where a small data is stored
> right after the inode.

The commit message is a little misleading -- this adds support for
inline data pages at nonzero (but page-aligned) file offsets, not file
offsets into the page itself.  I suggest:

"Add support for reading inline data content into the page cache from
nonzero page-aligned file offsets.  This enables the EROFS tailpacking
mode where the last few bytes of the file are stored right after the
inode."

The code changes look good to me.

--D

> 
> Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
> ---
>  fs/iomap/buffered-io.c | 35 ++++++++++++++++++-----------------
>  fs/iomap/direct-io.c   | 10 ++++++----
>  include/linux/iomap.h  | 14 ++++++++++++++
>  3 files changed, 38 insertions(+), 21 deletions(-)
> 
> diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
> index 87ccb3438becd9..0597f5c186a33f 100644
> --- a/fs/iomap/buffered-io.c
> +++ b/fs/iomap/buffered-io.c
> @@ -205,25 +205,29 @@ struct iomap_readpage_ctx {
>  	struct readahead_control *rac;
>  };
>  
> -static void
> -iomap_read_inline_data(struct inode *inode, struct page *page,
> -		struct iomap *iomap)
> +static int iomap_read_inline_data(struct inode *inode, struct page *page,
> +		struct iomap *iomap, loff_t pos)
>  {
> -	size_t size = i_size_read(inode);
> +	size_t size = iomap->length + iomap->offset - pos;
>  	void *addr;
>  
>  	if (PageUptodate(page))
> -		return;
> +		return PAGE_SIZE;
>  
> -	BUG_ON(page_has_private(page));
> -	BUG_ON(page->index);
> -	BUG_ON(size > PAGE_SIZE - offset_in_page(iomap->inline_data));
> +	/* inline data must start page aligned in the file */
> +	if (WARN_ON_ONCE(offset_in_page(pos)))
> +		return -EIO;
> +	if (WARN_ON_ONCE(!iomap_inline_data_size_valid(iomap)))
> +		return -EIO;
> +	if (WARN_ON_ONCE(page_has_private(page)))
> +		return -EIO;
>  
>  	addr = kmap_atomic(page);
> -	memcpy(addr, iomap->inline_data, size);
> +	memcpy(addr, iomap_inline_buf(iomap, pos), size);
>  	memset(addr + size, 0, PAGE_SIZE - size);
>  	kunmap_atomic(addr);
>  	SetPageUptodate(page);
> +	return PAGE_SIZE;
>  }
>  
>  static inline bool iomap_block_needs_zeroing(struct inode *inode,
> @@ -246,11 +250,8 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
>  	unsigned poff, plen;
>  	sector_t sector;
>  
> -	if (iomap->type == IOMAP_INLINE) {
> -		WARN_ON_ONCE(pos);
> -		iomap_read_inline_data(inode, page, iomap);
> -		return PAGE_SIZE;
> -	}
> +	if (iomap->type == IOMAP_INLINE)
> +		return iomap_read_inline_data(inode, page, iomap, pos);
>  
>  	/* zero post-eof blocks as the page may be mapped */
>  	iop = iomap_page_create(inode, page);
> @@ -618,14 +619,14 @@ iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
>  	}
>  
>  	if (srcmap->type == IOMAP_INLINE)
> -		iomap_read_inline_data(inode, page, srcmap);
> +		status = iomap_read_inline_data(inode, page, srcmap, pos);
>  	else if (iomap->flags & IOMAP_F_BUFFER_HEAD)
>  		status = __block_write_begin_int(page, pos, len, NULL, srcmap);
>  	else
>  		status = __iomap_write_begin(inode, pos, len, flags, page,
>  				srcmap);
>  
> -	if (unlikely(status))
> +	if (unlikely(status < 0))
>  		goto out_unlock;
>  
>  	*pagep = page;
> @@ -675,7 +676,7 @@ static size_t iomap_write_end_inline(struct inode *inode, struct page *page,
>  
>  	flush_dcache_page(page);
>  	addr = kmap_atomic(page);
> -	memcpy(iomap->inline_data + pos, addr + pos, copied);
> +	memcpy(iomap_inline_buf(iomap, pos), addr + pos, copied);
>  	kunmap_atomic(addr);
>  
>  	mark_inode_dirty(inode);
> diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
> index 9398b8c31323b3..a6aaea2764a55f 100644
> --- a/fs/iomap/direct-io.c
> +++ b/fs/iomap/direct-io.c
> @@ -378,23 +378,25 @@ iomap_dio_inline_actor(struct inode *inode, loff_t pos, loff_t length,
>  		struct iomap_dio *dio, struct iomap *iomap)
>  {
>  	struct iov_iter *iter = dio->submit.iter;
> +	void *dst = iomap_inline_buf(iomap, pos);
>  	size_t copied;
>  
> -	BUG_ON(pos + length > PAGE_SIZE - offset_in_page(iomap->inline_data));
> +	if (WARN_ON_ONCE(!iomap_inline_data_size_valid(iomap)))
> +		return -EIO;
>  
>  	if (dio->flags & IOMAP_DIO_WRITE) {
>  		loff_t size = inode->i_size;
>  
>  		if (pos > size)
> -			memset(iomap->inline_data + size, 0, pos - size);
> -		copied = copy_from_iter(iomap->inline_data + pos, length, iter);
> +			memset(iomap_inline_buf(iomap, size), 0, pos - size);
> +		copied = copy_from_iter(dst, length, iter);
>  		if (copied) {
>  			if (pos + copied > size)
>  				i_size_write(inode, pos + copied);
>  			mark_inode_dirty(inode);
>  		}
>  	} else {
> -		copied = copy_to_iter(iomap->inline_data + pos, length, iter);
> +		copied = copy_to_iter(dst, length, iter);
>  	}
>  	dio->size += copied;
>  	return copied;
> diff --git a/include/linux/iomap.h b/include/linux/iomap.h
> index 479c1da3e2211e..5efae7153912ed 100644
> --- a/include/linux/iomap.h
> +++ b/include/linux/iomap.h
> @@ -97,6 +97,20 @@ iomap_sector(struct iomap *iomap, loff_t pos)
>  	return (iomap->addr + pos - iomap->offset) >> SECTOR_SHIFT;
>  }
>  
> +static inline void *iomap_inline_buf(const struct iomap *iomap, loff_t pos)
> +{
> +	return iomap->inline_data - iomap->offset + pos;
> +}
> +
> +/*
> + * iomap->inline_data is a potentially kmapped page, ensure it never crosseѕ a
> + * page boundary.
> + */
> +static inline bool iomap_inline_data_size_valid(const struct iomap *iomap)
> +{
> +	return iomap->length <= PAGE_SIZE - offset_in_page(iomap->inline_data);
> +}
> +
>  /*
>   * When a filesystem sets page_ops in an iomap mapping it returns, page_prepare
>   * and page_done will be called for each page written to.  This only applies to
> -- 
> 2.30.2
>
Christoph Hellwig July 22, 2021, 4:53 p.m. UTC | #4
On Thu, Jul 22, 2021 at 09:51:09AM -0700, Darrick J. Wong wrote:
> The commit message is a little misleading -- this adds support for
> inline data pages at nonzero (but page-aligned) file offsets, not file
> offsets into the page itself.  I suggest:

It actually adds both.  pos is the offset into the file.
Matthew Wilcox July 22, 2021, 4:57 p.m. UTC | #5
On Thu, Jul 22, 2021 at 06:53:42PM +0200, Christoph Hellwig wrote:
> On Thu, Jul 22, 2021 at 09:51:09AM -0700, Darrick J. Wong wrote:
> > The commit message is a little misleading -- this adds support for
> > inline data pages at nonzero (but page-aligned) file offsets, not file
> > offsets into the page itself.  I suggest:
> 
> It actually adds both.  pos is the offset into the file.

If you want to add support for both, then you need to call
iomap_set_range_uptodate() instead of SetPageUptodate().  Otherwise,
you can set the page uptodate before submitting the bio for the first
half of the page.
Gao Xiang July 23, 2021, 1:26 a.m. UTC | #6
Hi Darrick,

On Thu, Jul 22, 2021 at 09:51:09AM -0700, Darrick J. Wong wrote:
> On Thu, Jul 22, 2021 at 07:39:47AM +0200, Christoph Hellwig wrote:
> > I think some of the language here is confusing - mostly about tail
> > packing when we otherwise use inline data.  Can you take a look at
> > the version below?  This mostly cleans up the terminology, adds a
> > new helper to check the size, and removes the error on trying to
> > write with a non-zero pos, as it can be trivially supported now.
> > 
> > ---
> > From 0f9c6ac6c2e372739b29195d25bebb8dd87e583a Mon Sep 17 00:00:00 2001
> > From: Gao Xiang <hsiangkao@linux.alibaba.com>
> > Date: Thu, 22 Jul 2021 11:17:29 +0800
> > Subject: iomap: make inline data support more flexible
> > 
> > Add support for offsets into the inline data page at iomap->inline_data
> > to cater for the EROFS tailpackng case where a small data is stored
> > right after the inode.
> 
> The commit message is a little misleading -- this adds support for
> inline data pages at nonzero (but page-aligned) file offsets, not file
> offsets into the page itself.  I suggest:
> 
> "Add support for reading inline data content into the page cache from
> nonzero page-aligned file offsets.  This enables the EROFS tailpacking
> mode where the last few bytes of the file are stored right after the
> inode."
> 
> The code changes look good to me.

Thanks for your suggestion. I've tested EROFS with no problem so far. 

I could update the commit message like this, what should I do next?

Many thanks,
Gao Xiang

> 
> --D
> 
> > 
> > Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
> > ---
> >  fs/iomap/buffered-io.c | 35 ++++++++++++++++++-----------------
> >  fs/iomap/direct-io.c   | 10 ++++++----
> >  include/linux/iomap.h  | 14 ++++++++++++++
> >  3 files changed, 38 insertions(+), 21 deletions(-)
> > 
> > diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
> > index 87ccb3438becd9..0597f5c186a33f 100644
> > --- a/fs/iomap/buffered-io.c
> > +++ b/fs/iomap/buffered-io.c
> > @@ -205,25 +205,29 @@ struct iomap_readpage_ctx {
> >  	struct readahead_control *rac;
> >  };
> >  
> > -static void
> > -iomap_read_inline_data(struct inode *inode, struct page *page,
> > -		struct iomap *iomap)
> > +static int iomap_read_inline_data(struct inode *inode, struct page *page,
> > +		struct iomap *iomap, loff_t pos)
> >  {
> > -	size_t size = i_size_read(inode);
> > +	size_t size = iomap->length + iomap->offset - pos;
> >  	void *addr;
> >  
> >  	if (PageUptodate(page))
> > -		return;
> > +		return PAGE_SIZE;
> >  
> > -	BUG_ON(page_has_private(page));
> > -	BUG_ON(page->index);
> > -	BUG_ON(size > PAGE_SIZE - offset_in_page(iomap->inline_data));
> > +	/* inline data must start page aligned in the file */
> > +	if (WARN_ON_ONCE(offset_in_page(pos)))
> > +		return -EIO;
> > +	if (WARN_ON_ONCE(!iomap_inline_data_size_valid(iomap)))
> > +		return -EIO;
> > +	if (WARN_ON_ONCE(page_has_private(page)))
> > +		return -EIO;
> >  
> >  	addr = kmap_atomic(page);
> > -	memcpy(addr, iomap->inline_data, size);
> > +	memcpy(addr, iomap_inline_buf(iomap, pos), size);
> >  	memset(addr + size, 0, PAGE_SIZE - size);
> >  	kunmap_atomic(addr);
> >  	SetPageUptodate(page);
> > +	return PAGE_SIZE;
> >  }
> >  
> >  static inline bool iomap_block_needs_zeroing(struct inode *inode,
> > @@ -246,11 +250,8 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
> >  	unsigned poff, plen;
> >  	sector_t sector;
> >  
> > -	if (iomap->type == IOMAP_INLINE) {
> > -		WARN_ON_ONCE(pos);
> > -		iomap_read_inline_data(inode, page, iomap);
> > -		return PAGE_SIZE;
> > -	}
> > +	if (iomap->type == IOMAP_INLINE)
> > +		return iomap_read_inline_data(inode, page, iomap, pos);
> >  
> >  	/* zero post-eof blocks as the page may be mapped */
> >  	iop = iomap_page_create(inode, page);
> > @@ -618,14 +619,14 @@ iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
> >  	}
> >  
> >  	if (srcmap->type == IOMAP_INLINE)
> > -		iomap_read_inline_data(inode, page, srcmap);
> > +		status = iomap_read_inline_data(inode, page, srcmap, pos);
> >  	else if (iomap->flags & IOMAP_F_BUFFER_HEAD)
> >  		status = __block_write_begin_int(page, pos, len, NULL, srcmap);
> >  	else
> >  		status = __iomap_write_begin(inode, pos, len, flags, page,
> >  				srcmap);
> >  
> > -	if (unlikely(status))
> > +	if (unlikely(status < 0))
> >  		goto out_unlock;
> >  
> >  	*pagep = page;
> > @@ -675,7 +676,7 @@ static size_t iomap_write_end_inline(struct inode *inode, struct page *page,
> >  
> >  	flush_dcache_page(page);
> >  	addr = kmap_atomic(page);
> > -	memcpy(iomap->inline_data + pos, addr + pos, copied);
> > +	memcpy(iomap_inline_buf(iomap, pos), addr + pos, copied);
> >  	kunmap_atomic(addr);
> >  
> >  	mark_inode_dirty(inode);
> > diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
> > index 9398b8c31323b3..a6aaea2764a55f 100644
> > --- a/fs/iomap/direct-io.c
> > +++ b/fs/iomap/direct-io.c
> > @@ -378,23 +378,25 @@ iomap_dio_inline_actor(struct inode *inode, loff_t pos, loff_t length,
> >  		struct iomap_dio *dio, struct iomap *iomap)
> >  {
> >  	struct iov_iter *iter = dio->submit.iter;
> > +	void *dst = iomap_inline_buf(iomap, pos);
> >  	size_t copied;
> >  
> > -	BUG_ON(pos + length > PAGE_SIZE - offset_in_page(iomap->inline_data));
> > +	if (WARN_ON_ONCE(!iomap_inline_data_size_valid(iomap)))
> > +		return -EIO;
> >  
> >  	if (dio->flags & IOMAP_DIO_WRITE) {
> >  		loff_t size = inode->i_size;
> >  
> >  		if (pos > size)
> > -			memset(iomap->inline_data + size, 0, pos - size);
> > -		copied = copy_from_iter(iomap->inline_data + pos, length, iter);
> > +			memset(iomap_inline_buf(iomap, size), 0, pos - size);
> > +		copied = copy_from_iter(dst, length, iter);
> >  		if (copied) {
> >  			if (pos + copied > size)
> >  				i_size_write(inode, pos + copied);
> >  			mark_inode_dirty(inode);
> >  		}
> >  	} else {
> > -		copied = copy_to_iter(iomap->inline_data + pos, length, iter);
> > +		copied = copy_to_iter(dst, length, iter);
> >  	}
> >  	dio->size += copied;
> >  	return copied;
> > diff --git a/include/linux/iomap.h b/include/linux/iomap.h
> > index 479c1da3e2211e..5efae7153912ed 100644
> > --- a/include/linux/iomap.h
> > +++ b/include/linux/iomap.h
> > @@ -97,6 +97,20 @@ iomap_sector(struct iomap *iomap, loff_t pos)
> >  	return (iomap->addr + pos - iomap->offset) >> SECTOR_SHIFT;
> >  }
> >  
> > +static inline void *iomap_inline_buf(const struct iomap *iomap, loff_t pos)
> > +{
> > +	return iomap->inline_data - iomap->offset + pos;
> > +}
> > +
> > +/*
> > + * iomap->inline_data is a potentially kmapped page, ensure it never crosseѕ a
> > + * page boundary.
> > + */
> > +static inline bool iomap_inline_data_size_valid(const struct iomap *iomap)
> > +{
> > +	return iomap->length <= PAGE_SIZE - offset_in_page(iomap->inline_data);
> > +}
> > +
> >  /*
> >   * When a filesystem sets page_ops in an iomap mapping it returns, page_prepare
> >   * and page_done will be called for each page written to.  This only applies to
> > -- 
> > 2.30.2
> >
Huang Jianan July 23, 2021, 6:22 a.m. UTC | #7
Hi Xiang,

I have tested this patch based on the erofs dax support below, It works 
well for me.

https://git.kernel.org/pub/scm/linux/kernel/git/xiang/linux.git/log/?h=erofs/dax

Tested-by: Huang Jianan <huangjianan@oppo.com>

Thanks,

Jianan

On 2021/7/23 9:26, Gao Xiang wrote:
> Hi Darrick,
>
> On Thu, Jul 22, 2021 at 09:51:09AM -0700, Darrick J. Wong wrote:
>> On Thu, Jul 22, 2021 at 07:39:47AM +0200, Christoph Hellwig wrote:
>>> I think some of the language here is confusing - mostly about tail
>>> packing when we otherwise use inline data.  Can you take a look at
>>> the version below?  This mostly cleans up the terminology, adds a
>>> new helper to check the size, and removes the error on trying to
>>> write with a non-zero pos, as it can be trivially supported now.
>>>
>>> ---
>>>  From 0f9c6ac6c2e372739b29195d25bebb8dd87e583a Mon Sep 17 00:00:00 2001
>>> From: Gao Xiang <hsiangkao@linux.alibaba.com>
>>> Date: Thu, 22 Jul 2021 11:17:29 +0800
>>> Subject: iomap: make inline data support more flexible
>>>
>>> Add support for offsets into the inline data page at iomap->inline_data
>>> to cater for the EROFS tailpackng case where a small data is stored
>>> right after the inode.
>> The commit message is a little misleading -- this adds support for
>> inline data pages at nonzero (but page-aligned) file offsets, not file
>> offsets into the page itself.  I suggest:
>>
>> "Add support for reading inline data content into the page cache from
>> nonzero page-aligned file offsets.  This enables the EROFS tailpacking
>> mode where the last few bytes of the file are stored right after the
>> inode."
>>
>> The code changes look good to me.
> Thanks for your suggestion. I've tested EROFS with no problem so far.
>
> I could update the commit message like this, what should I do next?
>
> Many thanks,
> Gao Xiang
>
>> --D
>>
>>> Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
>>> ---
>>>   fs/iomap/buffered-io.c | 35 ++++++++++++++++++-----------------
>>>   fs/iomap/direct-io.c   | 10 ++++++----
>>>   include/linux/iomap.h  | 14 ++++++++++++++
>>>   3 files changed, 38 insertions(+), 21 deletions(-)
>>>
>>> diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
>>> index 87ccb3438becd9..0597f5c186a33f 100644
>>> --- a/fs/iomap/buffered-io.c
>>> +++ b/fs/iomap/buffered-io.c
>>> @@ -205,25 +205,29 @@ struct iomap_readpage_ctx {
>>>   	struct readahead_control *rac;
>>>   };
>>>   
>>> -static void
>>> -iomap_read_inline_data(struct inode *inode, struct page *page,
>>> -		struct iomap *iomap)
>>> +static int iomap_read_inline_data(struct inode *inode, struct page *page,
>>> +		struct iomap *iomap, loff_t pos)
>>>   {
>>> -	size_t size = i_size_read(inode);
>>> +	size_t size = iomap->length + iomap->offset - pos;
>>>   	void *addr;
>>>   
>>>   	if (PageUptodate(page))
>>> -		return;
>>> +		return PAGE_SIZE;
>>>   
>>> -	BUG_ON(page_has_private(page));
>>> -	BUG_ON(page->index);
>>> -	BUG_ON(size > PAGE_SIZE - offset_in_page(iomap->inline_data));
>>> +	/* inline data must start page aligned in the file */
>>> +	if (WARN_ON_ONCE(offset_in_page(pos)))
>>> +		return -EIO;
>>> +	if (WARN_ON_ONCE(!iomap_inline_data_size_valid(iomap)))
>>> +		return -EIO;
>>> +	if (WARN_ON_ONCE(page_has_private(page)))
>>> +		return -EIO;
>>>   
>>>   	addr = kmap_atomic(page);
>>> -	memcpy(addr, iomap->inline_data, size);
>>> +	memcpy(addr, iomap_inline_buf(iomap, pos), size);
>>>   	memset(addr + size, 0, PAGE_SIZE - size);
>>>   	kunmap_atomic(addr);
>>>   	SetPageUptodate(page);
>>> +	return PAGE_SIZE;
>>>   }
>>>   
>>>   static inline bool iomap_block_needs_zeroing(struct inode *inode,
>>> @@ -246,11 +250,8 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
>>>   	unsigned poff, plen;
>>>   	sector_t sector;
>>>   
>>> -	if (iomap->type == IOMAP_INLINE) {
>>> -		WARN_ON_ONCE(pos);
>>> -		iomap_read_inline_data(inode, page, iomap);
>>> -		return PAGE_SIZE;
>>> -	}
>>> +	if (iomap->type == IOMAP_INLINE)
>>> +		return iomap_read_inline_data(inode, page, iomap, pos);
>>>   
>>>   	/* zero post-eof blocks as the page may be mapped */
>>>   	iop = iomap_page_create(inode, page);
>>> @@ -618,14 +619,14 @@ iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
>>>   	}
>>>   
>>>   	if (srcmap->type == IOMAP_INLINE)
>>> -		iomap_read_inline_data(inode, page, srcmap);
>>> +		status = iomap_read_inline_data(inode, page, srcmap, pos);
>>>   	else if (iomap->flags & IOMAP_F_BUFFER_HEAD)
>>>   		status = __block_write_begin_int(page, pos, len, NULL, srcmap);
>>>   	else
>>>   		status = __iomap_write_begin(inode, pos, len, flags, page,
>>>   				srcmap);
>>>   
>>> -	if (unlikely(status))
>>> +	if (unlikely(status < 0))
>>>   		goto out_unlock;
>>>   
>>>   	*pagep = page;
>>> @@ -675,7 +676,7 @@ static size_t iomap_write_end_inline(struct inode *inode, struct page *page,
>>>   
>>>   	flush_dcache_page(page);
>>>   	addr = kmap_atomic(page);
>>> -	memcpy(iomap->inline_data + pos, addr + pos, copied);
>>> +	memcpy(iomap_inline_buf(iomap, pos), addr + pos, copied);
>>>   	kunmap_atomic(addr);
>>>   
>>>   	mark_inode_dirty(inode);
>>> diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
>>> index 9398b8c31323b3..a6aaea2764a55f 100644
>>> --- a/fs/iomap/direct-io.c
>>> +++ b/fs/iomap/direct-io.c
>>> @@ -378,23 +378,25 @@ iomap_dio_inline_actor(struct inode *inode, loff_t pos, loff_t length,
>>>   		struct iomap_dio *dio, struct iomap *iomap)
>>>   {
>>>   	struct iov_iter *iter = dio->submit.iter;
>>> +	void *dst = iomap_inline_buf(iomap, pos);
>>>   	size_t copied;
>>>   
>>> -	BUG_ON(pos + length > PAGE_SIZE - offset_in_page(iomap->inline_data));
>>> +	if (WARN_ON_ONCE(!iomap_inline_data_size_valid(iomap)))
>>> +		return -EIO;
>>>   
>>>   	if (dio->flags & IOMAP_DIO_WRITE) {
>>>   		loff_t size = inode->i_size;
>>>   
>>>   		if (pos > size)
>>> -			memset(iomap->inline_data + size, 0, pos - size);
>>> -		copied = copy_from_iter(iomap->inline_data + pos, length, iter);
>>> +			memset(iomap_inline_buf(iomap, size), 0, pos - size);
>>> +		copied = copy_from_iter(dst, length, iter);
>>>   		if (copied) {
>>>   			if (pos + copied > size)
>>>   				i_size_write(inode, pos + copied);
>>>   			mark_inode_dirty(inode);
>>>   		}
>>>   	} else {
>>> -		copied = copy_to_iter(iomap->inline_data + pos, length, iter);
>>> +		copied = copy_to_iter(dst, length, iter);
>>>   	}
>>>   	dio->size += copied;
>>>   	return copied;
>>> diff --git a/include/linux/iomap.h b/include/linux/iomap.h
>>> index 479c1da3e2211e..5efae7153912ed 100644
>>> --- a/include/linux/iomap.h
>>> +++ b/include/linux/iomap.h
>>> @@ -97,6 +97,20 @@ iomap_sector(struct iomap *iomap, loff_t pos)
>>>   	return (iomap->addr + pos - iomap->offset) >> SECTOR_SHIFT;
>>>   }
>>>   
>>> +static inline void *iomap_inline_buf(const struct iomap *iomap, loff_t pos)
>>> +{
>>> +	return iomap->inline_data - iomap->offset + pos;
>>> +}
>>> +
>>> +/*
>>> + * iomap->inline_data is a potentially kmapped page, ensure it never crosseѕ a
>>> + * page boundary.
>>> + */
>>> +static inline bool iomap_inline_data_size_valid(const struct iomap *iomap)
>>> +{
>>> +	return iomap->length <= PAGE_SIZE - offset_in_page(iomap->inline_data);
>>> +}
>>> +
>>>   /*
>>>    * When a filesystem sets page_ops in an iomap mapping it returns, page_prepare
>>>    * and page_done will be called for each page written to.  This only applies to
>>> -- 
>>> 2.30.2
>>>
Matthew Wilcox July 23, 2021, 3:05 p.m. UTC | #8
On Thu, Jul 22, 2021 at 07:39:47AM +0200, Christoph Hellwig wrote:
> @@ -675,7 +676,7 @@ static size_t iomap_write_end_inline(struct inode *inode, struct page *page,
>  
>  	flush_dcache_page(page);
>  	addr = kmap_atomic(page);
> -	memcpy(iomap->inline_data + pos, addr + pos, copied);
> +	memcpy(iomap_inline_buf(iomap, pos), addr + pos, copied);

This is wrong; pos can be > PAGE_SIZE, so this needs to be
addr + offset_in_page(pos).
Gao Xiang July 23, 2021, 3:23 p.m. UTC | #9
Hi Matthew,

On Fri, Jul 23, 2021 at 04:05:29PM +0100, Matthew Wilcox wrote:
> On Thu, Jul 22, 2021 at 07:39:47AM +0200, Christoph Hellwig wrote:
> > @@ -675,7 +676,7 @@ static size_t iomap_write_end_inline(struct inode *inode, struct page *page,
> >  
> >  	flush_dcache_page(page);
> >  	addr = kmap_atomic(page);
> > -	memcpy(iomap->inline_data + pos, addr + pos, copied);
> > +	memcpy(iomap_inline_buf(iomap, pos), addr + pos, copied);
> 
> This is wrong; pos can be > PAGE_SIZE, so this needs to be
> addr + offset_in_page(pos).

Yeah, thanks for pointing out. It seems so, since EROFS cannot test
such write path, previously it was disabled explicitly. I could
update it in the next version as above.

Thanks,
Gao Xiang
Matthew Wilcox July 23, 2021, 3:56 p.m. UTC | #10
On Fri, Jul 23, 2021 at 11:23:38PM +0800, Gao Xiang wrote:
> Hi Matthew,
> 
> On Fri, Jul 23, 2021 at 04:05:29PM +0100, Matthew Wilcox wrote:
> > On Thu, Jul 22, 2021 at 07:39:47AM +0200, Christoph Hellwig wrote:
> > > @@ -675,7 +676,7 @@ static size_t iomap_write_end_inline(struct inode *inode, struct page *page,
> > >  
> > >  	flush_dcache_page(page);
> > >  	addr = kmap_atomic(page);
> > > -	memcpy(iomap->inline_data + pos, addr + pos, copied);
> > > +	memcpy(iomap_inline_buf(iomap, pos), addr + pos, copied);
> > 
> > This is wrong; pos can be > PAGE_SIZE, so this needs to be
> > addr + offset_in_page(pos).
> 
> Yeah, thanks for pointing out. It seems so, since EROFS cannot test
> such write path, previously it was disabled explicitly. I could
> update it in the next version as above.

We're also missing a call to __set_page_dirty_nobuffers().  This
matters to nobody right now -- erofs is read-only and gfs2 only
supports inline data in the inode.  I presume what is happening
for gfs2 is that at inode writeback time, it copies the ~60 bytes
from the page cache into the inode and then schedules the inode
for writeback.

But logically, we should mark the page as dirty.  It'll be marked
as dirty by ->mkwrite, should the page be mmaped, so gfs2 must
already cope with a dirty page for inline data.
Gao Xiang July 23, 2021, 4:24 p.m. UTC | #11
On Fri, Jul 23, 2021 at 04:56:35PM +0100, Matthew Wilcox wrote:
> On Fri, Jul 23, 2021 at 11:23:38PM +0800, Gao Xiang wrote:
> > Hi Matthew,
> > 
> > On Fri, Jul 23, 2021 at 04:05:29PM +0100, Matthew Wilcox wrote:
> > > On Thu, Jul 22, 2021 at 07:39:47AM +0200, Christoph Hellwig wrote:
> > > > @@ -675,7 +676,7 @@ static size_t iomap_write_end_inline(struct inode *inode, struct page *page,
> > > >  
> > > >  	flush_dcache_page(page);
> > > >  	addr = kmap_atomic(page);
> > > > -	memcpy(iomap->inline_data + pos, addr + pos, copied);
> > > > +	memcpy(iomap_inline_buf(iomap, pos), addr + pos, copied);
> > > 
> > > This is wrong; pos can be > PAGE_SIZE, so this needs to be
> > > addr + offset_in_page(pos).
> > 
> > Yeah, thanks for pointing out. It seems so, since EROFS cannot test
> > such write path, previously it was disabled explicitly. I could
> > update it in the next version as above.
> 
> We're also missing a call to __set_page_dirty_nobuffers().  This
> matters to nobody right now -- erofs is read-only and gfs2 only
> supports inline data in the inode.  I presume what is happening
> for gfs2 is that at inode writeback time, it copies the ~60 bytes
> from the page cache into the inode and then schedules the inode
> for writeback.
> 
> But logically, we should mark the page as dirty.  It'll be marked
> as dirty by ->mkwrite, should the page be mmaped, so gfs2 must
> already cope with a dirty page for inline data.

I'd suggest we still disable tail-packing inline for buffered write
path until some real user for testing. I can see some (maybe) page
writeback, inode writeback and inline converting cases which is
somewhat complicated than just update like this.

I suggest it could be implemented with some real users, at least it can
provide the real write pattern and paths for testing. I will send the
next version like my previous version to disable it until some real fs
user cares and works out a real pattern.

Thanks,
Gao Xiang
diff mbox series

Patch

diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 87ccb3438bec..a3003f3e4d0e 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -205,25 +205,35 @@  struct iomap_readpage_ctx {
 	struct readahead_control *rac;
 };
 
-static void
+static int
 iomap_read_inline_data(struct inode *inode, struct page *page,
-		struct iomap *iomap)
+		struct iomap *iomap, loff_t pos)
 {
-	size_t size = i_size_read(inode);
+	unsigned int size;
 	void *addr;
 
 	if (PageUptodate(page))
-		return;
+		return PAGE_SIZE;
 
 	BUG_ON(page_has_private(page));
-	BUG_ON(page->index);
-	BUG_ON(size > PAGE_SIZE - offset_in_page(iomap->inline_data));
 
+	/* only support page-aligned tailpacking for now */
+	if (WARN_ON_ONCE(offset_in_page(pos)))
+		return -EIO;
+	/*
+	 * iomap->inline_data is a kernel-mapped memory page, so we must
+	 * terminate the read at the end of that page.
+	 */
+	if (WARN_ON_ONCE(iomap->length > PAGE_SIZE -
+			 offset_in_page(iomap->inline_data)))
+		return -EIO;
+	size = iomap->length + iomap->offset - pos;
 	addr = kmap_atomic(page);
-	memcpy(addr, iomap->inline_data, size);
+	memcpy(addr, iomap_inline_buf(iomap, pos), size);
 	memset(addr + size, 0, PAGE_SIZE - size);
 	kunmap_atomic(addr);
 	SetPageUptodate(page);
+	return PAGE_SIZE;
 }
 
 static inline bool iomap_block_needs_zeroing(struct inode *inode,
@@ -246,11 +256,8 @@  iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
 	unsigned poff, plen;
 	sector_t sector;
 
-	if (iomap->type == IOMAP_INLINE) {
-		WARN_ON_ONCE(pos);
-		iomap_read_inline_data(inode, page, iomap);
-		return PAGE_SIZE;
-	}
+	if (iomap->type == IOMAP_INLINE)
+		return iomap_read_inline_data(inode, page, iomap, pos);
 
 	/* zero post-eof blocks as the page may be mapped */
 	iop = iomap_page_create(inode, page);
@@ -589,6 +596,16 @@  __iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, int flags,
 	return 0;
 }
 
+static int iomap_write_begin_inline(struct inode *inode,
+		struct page *page, struct iomap *srcmap)
+{
+	/* needs more work for the tailpacking case, disable for now */
+	if (WARN_ON_ONCE(srcmap->offset != 0))
+		return -EIO;
+	iomap_read_inline_data(inode, page, srcmap, 0);
+	return 0;
+}
+
 static int
 iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
 		struct page **pagep, struct iomap *iomap, struct iomap *srcmap)
@@ -618,7 +635,7 @@  iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
 	}
 
 	if (srcmap->type == IOMAP_INLINE)
-		iomap_read_inline_data(inode, page, srcmap);
+		status = iomap_write_begin_inline(inode, page, srcmap);
 	else if (iomap->flags & IOMAP_F_BUFFER_HEAD)
 		status = __block_write_begin_int(page, pos, len, NULL, srcmap);
 	else
diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
index 9398b8c31323..f9253cae5b24 100644
--- a/fs/iomap/direct-io.c
+++ b/fs/iomap/direct-io.c
@@ -379,22 +379,29 @@  iomap_dio_inline_actor(struct inode *inode, loff_t pos, loff_t length,
 {
 	struct iov_iter *iter = dio->submit.iter;
 	size_t copied;
+	void *dst = iomap_inline_buf(iomap, pos);
 
-	BUG_ON(pos + length > PAGE_SIZE - offset_in_page(iomap->inline_data));
+	/*
+	 * iomap->inline_data is a kernel-mapped memory page, so we must
+	 * terminate the write at the end of that page.
+	 */
+	if (WARN_ON_ONCE(length > PAGE_SIZE -
+			 offset_in_page(iomap->inline_data)))
+		return -EIO;
 
 	if (dio->flags & IOMAP_DIO_WRITE) {
 		loff_t size = inode->i_size;
 
 		if (pos > size)
-			memset(iomap->inline_data + size, 0, pos - size);
-		copied = copy_from_iter(iomap->inline_data + pos, length, iter);
+			memset(iomap_inline_buf(iomap, size), 0, pos - size);
+		copied = copy_from_iter(dst, length, iter);
 		if (copied) {
 			if (pos + copied > size)
 				i_size_write(inode, pos + copied);
 			mark_inode_dirty(inode);
 		}
 	} else {
-		copied = copy_to_iter(iomap->inline_data + pos, length, iter);
+		copied = copy_to_iter(dst, length, iter);
 	}
 	dio->size += copied;
 	return copied;
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 479c1da3e221..fb6934943eeb 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -97,6 +97,12 @@  iomap_sector(struct iomap *iomap, loff_t pos)
 	return (iomap->addr + pos - iomap->offset) >> SECTOR_SHIFT;
 }
 
+static inline void *
+iomap_inline_buf(const struct iomap *iomap, loff_t pos)
+{
+	return iomap->inline_data + pos - iomap->offset;
+}
+
 /*
  * When a filesystem sets page_ops in an iomap mapping it returns, page_prepare
  * and page_done will be called for each page written to.  This only applies to