@@ -1300,7 +1300,7 @@ static int gfs2_block_zero_range(struct inode *inode, loff_t from,
unsigned int length)
{
BUG_ON(current->journal_info);
- return iomap_zero_range(inode, from, length, NULL, &gfs2_iomap_ops);
+ return iomap_zero_range(inode, from, length, NULL, &gfs2_iomap_ops, 0);
}
#define GFS2_JTRUNC_REVOKES 8192
@@ -1350,7 +1350,8 @@ static inline int iomap_zero_iter_flush_and_stale(struct iomap_iter *i)
return filemap_write_and_wait_range(mapping, i->pos, end);
}
-static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero)
+static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero,
+ unsigned zeroing_flags)
{
loff_t pos = iter->pos;
loff_t length = iomap_length(iter);
@@ -1363,6 +1364,18 @@ static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero)
size_t bytes = min_t(u64, SIZE_MAX, length);
bool ret;
+ /*
+ * If we've gone past EOF and have a written mapping, and the
+ * filesystem supports written mappings past EOF, skip the rest
+ * of the range. We can't write that back anyway.
+ */
+ if (pos > iter->inode->i_size &&
+ (zeroing_flags & IOMAP_ZERO_MAPPED_BEYOND_EOF)) {
+ written += length;
+ length = 0;
+ break;
+ }
+
status = iomap_write_begin(iter, pos, bytes, &folio);
if (status)
return status;
@@ -1395,7 +1408,7 @@ static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero)
int
iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero,
- const struct iomap_ops *ops)
+ const struct iomap_ops *ops, unsigned zeroing_flags)
{
struct iomap_iter iter = {
.inode = inode,
@@ -1424,7 +1437,8 @@ iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero,
filemap_range_needs_writeback(mapping, pos, pos + plen - 1)) {
iter.len = plen;
while ((ret = iomap_iter(&iter, ops)) > 0)
- iter.processed = iomap_zero_iter(&iter, did_zero);
+ iter.processed = iomap_zero_iter(&iter, did_zero,
+ zeroing_flags);
iter.len = len - (iter.pos - pos);
if (ret || !iter.len)
@@ -1453,7 +1467,8 @@ iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero,
continue;
}
- iter.processed = iomap_zero_iter(&iter, did_zero);
+ iter.processed = iomap_zero_iter(&iter, did_zero,
+ zeroing_flags);
}
return ret;
}
@@ -1469,7 +1484,7 @@ iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero,
/* Block boundary? Nothing to do */
if (!off)
return 0;
- return iomap_zero_range(inode, pos, blocksize - off, did_zero, ops);
+ return iomap_zero_range(inode, pos, blocksize - off, did_zero, ops, 0);
}
EXPORT_SYMBOL_GPL(iomap_truncate_page);
@@ -1490,14 +1490,39 @@ xfs_zero_range(
bool *did_zero)
{
struct inode *inode = VFS_I(ip);
+ unsigned int zeroing_flags = 0;
xfs_assert_ilocked(ip, XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL);
if (IS_DAX(inode))
return dax_zero_range(inode, pos, len, did_zero,
&xfs_dax_write_iomap_ops);
+
+ /*
+ * Files with allocation units larger than the fsblock size can share
+ * zeroed written blocks beyond EOF if the EOF is in the middle of an
+ * allocation unit because it keeps the refcounting code simple. We
+ * therefore permit zeroing of pagecache for these post-EOF written
+ * extents so that the blocks in the CoW staging extent beyond EOF are
+ * all initialized to zero.
+ *
+ * Alternate designs could be: (a) don't allow sharing of an allocation
+ * unit that spans EOF because of the unwritten blocks; (b) rewrite the
+ * reflink code to allow shared unwritten extents in this one corner
+ * case; or (c) insert zeroed pages into the pagecache to get around
+ * the checks in iomap_zero_range.
+ *
+ * However, this design (allow zeroing of pagecache beyond EOF) was
+ * chosen because it most closely resembles what we do for allocation
+ * unit == 1 fsblock. Note that for these files, we force writeback
+ * of post-EOF folios to ensure that CoW always happens in units of
+ * allocation units.
+ */
+ if (xfs_inode_has_bigrtalloc(ip) && xfs_has_reflink(ip->i_mount))
+ zeroing_flags |= IOMAP_ZERO_MAPPED_BEYOND_EOF;
+
return iomap_zero_range(inode, pos, len, did_zero,
- &xfs_buffered_write_iomap_ops);
+ &xfs_buffered_write_iomap_ops, zeroing_flags);
}
int
@@ -306,7 +306,11 @@ bool iomap_dirty_folio(struct address_space *mapping, struct folio *folio);
int iomap_file_unshare(struct inode *inode, loff_t pos, loff_t len,
const struct iomap_ops *ops);
int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len,
- bool *did_zero, const struct iomap_ops *ops);
+ bool *did_zero, const struct iomap_ops *ops,
+ unsigned zeroing_flags);
+/* ignore written mappings allowed beyond EOF */
+#define IOMAP_ZERO_MAPPED_BEYOND_EOF (1U << 0)
+
int iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero,
const struct iomap_ops *ops);
vm_fault_t iomap_page_mkwrite(struct vm_fault *vmf,