[02/11] iomap: allow zeroing of written extents beyond EOF

Message ID	173405125776.1184063.5414430767804356851.stgit@frogsfrogsfrogs (mailing list archive)
State	New
Headers	show Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id A05722AD25 for <linux-xfs@vger.kernel.org>; Fri, 13 Dec 2024 01:21:59 +0000 (UTC) Date: Thu, 12 Dec 2024 17:21:59 -0800 Subject: [PATCH 02/11] iomap: allow zeroing of written extents beyond EOF From: "Darrick J. Wong" <djwong@kernel.org> To: djwong@kernel.org Cc: hch@lst.de, linux-xfs@vger.kernel.org Message-ID: <173405125776.1184063.5414430767804356851.stgit@frogsfrogsfrogs> In-Reply-To: <173405125712.1184063.11685981006674346615.stgit@frogsfrogsfrogs> References: <173405125712.1184063.11685981006674346615.stgit@frogsfrogsfrogs> Precedence: bulk MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: 7bit
Series	[01/11] vfs: explicitly pass the block size to the remap prep function \| expand [01/11] vfs: explicitly pass the block size to the remap prep function [02/11] iomap: allow zeroing of written extents beyond EOF [03/11] xfs: convert partially written rt file extents to completely written [04/11] xfs: enable CoW when rt extent size is larger than 1 block [05/11] xfs: forcibly convert unwritten blocks within an rt extent before sharing [06/11] xfs: add some tracepoints for writeback [07/11] xfs: extend writeback requests to handle rt cow correctly [08/11] xfs: enable extent size hints for CoW when rtextsize > 1 [09/11] xfs: allow reflink on the rt volume when extent size is larger than 1 rt block [10/11] xfs: fix integer overflow when validating extent size hints [11/11] xfs: support realtime reflink with an extent size that isn't a power of 2

diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 1795c4e8dbf66a..ce9293c916363e 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -1300,7 +1300,7 @@ static int gfs2_block_zero_range(struct inode *inode, loff_t from, unsigned int length) { BUG_ON(current->journal_info); - return iomap_zero_range(inode, from, length, NULL, &gfs2_iomap_ops); + return iomap_zero_range(inode, from, length, NULL, &gfs2_iomap_ops, 0); } #define GFS2_JTRUNC_REVOKES 8192 diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 955f19e27e47c5..4e851e9c2a1002 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -1350,7 +1350,8 @@ static inline int iomap_zero_iter_flush_and_stale(struct iomap_iter *i) return filemap_write_and_wait_range(mapping, i->pos, end); } -static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero) +static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero, + unsigned zeroing_flags) { loff_t pos = iter->pos; loff_t length = iomap_length(iter); @@ -1363,6 +1364,18 @@ static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero) size_t bytes = min_t(u64, SIZE_MAX, length); bool ret; + /* + * If we've gone past EOF and have a written mapping, and the + * filesystem supports written mappings past EOF, skip the rest + * of the range. We can't write that back anyway. + */ + if (pos > iter->inode->i_size && + (zeroing_flags & IOMAP_ZERO_MAPPED_BEYOND_EOF)) { + written += length; + length = 0; + break; + } + status = iomap_write_begin(iter, pos, bytes, &folio); if (status) return status; @@ -1395,7 +1408,7 @@ static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero) int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero, - const struct iomap_ops *ops) + const struct iomap_ops *ops, unsigned zeroing_flags) { struct iomap_iter iter = { .inode = inode, @@ -1424,7 +1437,8 @@ iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero, filemap_range_needs_writeback(mapping, pos, pos + plen - 1)) { iter.len = plen; while ((ret = iomap_iter(&iter, ops)) > 0) - iter.processed = iomap_zero_iter(&iter, did_zero); + iter.processed = iomap_zero_iter(&iter, did_zero, + zeroing_flags); iter.len = len - (iter.pos - pos); if (ret || !iter.len) @@ -1453,7 +1467,8 @@ iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero, continue; } - iter.processed = iomap_zero_iter(&iter, did_zero); + iter.processed = iomap_zero_iter(&iter, did_zero, + zeroing_flags); } return ret; } @@ -1469,7 +1484,7 @@ iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero, /* Block boundary? Nothing to do */ if (!off) return 0; - return iomap_zero_range(inode, pos, blocksize - off, did_zero, ops); + return iomap_zero_range(inode, pos, blocksize - off, did_zero, ops, 0); } EXPORT_SYMBOL_GPL(iomap_truncate_page); diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 50fa3ef89f6c98..b7d0dfd5fd3117 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -1490,14 +1490,39 @@ xfs_zero_range( bool *did_zero) { struct inode *inode = VFS_I(ip); + unsigned int zeroing_flags = 0; xfs_assert_ilocked(ip, XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL); if (IS_DAX(inode)) return dax_zero_range(inode, pos, len, did_zero, &xfs_dax_write_iomap_ops); + + /* + * Files with allocation units larger than the fsblock size can share + * zeroed written blocks beyond EOF if the EOF is in the middle of an + * allocation unit because it keeps the refcounting code simple. We + * therefore permit zeroing of pagecache for these post-EOF written + * extents so that the blocks in the CoW staging extent beyond EOF are + * all initialized to zero. + * + * Alternate designs could be: (a) don't allow sharing of an allocation + * unit that spans EOF because of the unwritten blocks; (b) rewrite the + * reflink code to allow shared unwritten extents in this one corner + * case; or (c) insert zeroed pages into the pagecache to get around + * the checks in iomap_zero_range. + * + * However, this design (allow zeroing of pagecache beyond EOF) was + * chosen because it most closely resembles what we do for allocation + * unit == 1 fsblock. Note that for these files, we force writeback + * of post-EOF folios to ensure that CoW always happens in units of + * allocation units. + */ + if (xfs_inode_has_bigrtalloc(ip) && xfs_has_reflink(ip->i_mount)) + zeroing_flags |= IOMAP_ZERO_MAPPED_BEYOND_EOF; + return iomap_zero_range(inode, pos, len, did_zero, - &xfs_buffered_write_iomap_ops); + &xfs_buffered_write_iomap_ops, zeroing_flags); } int diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 5675af6b740c27..31a5aa239aab1d 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -306,7 +306,11 @@ bool iomap_dirty_folio(struct address_space *mapping, struct folio *folio); int iomap_file_unshare(struct inode *inode, loff_t pos, loff_t len, const struct iomap_ops *ops); int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, - bool *did_zero, const struct iomap_ops *ops); + bool *did_zero, const struct iomap_ops *ops, + unsigned zeroing_flags); +/* ignore written mappings allowed beyond EOF */ +#define IOMAP_ZERO_MAPPED_BEYOND_EOF (1U << 0) + int iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero, const struct iomap_ops *ops); vm_fault_t iomap_page_mkwrite(struct vm_fault *vmf,

[02/11] iomap: allow zeroing of written extents beyond EOF

Commit Message

Patch