@@ -584,7 +584,7 @@ static int iomap_write_begin_inline(const struct iomap_iter *iter,
return iomap_read_inline_data(iter, folio);
}
-static int iomap_write_begin(const struct iomap_iter *iter, loff_t pos,
+static int iomap_write_begin(struct iomap_iter *iter, loff_t pos,
size_t len, struct folio **foliop)
{
const struct iomap_page_ops *page_ops = iter->iomap.page_ops;
@@ -618,6 +618,27 @@ static int iomap_write_begin(const struct iomap_iter *iter, loff_t pos,
status = (iter->flags & IOMAP_NOWAIT) ? -EAGAIN : -ENOMEM;
goto out_no_page;
}
+
+ /*
+ * Now we have a locked folio, before we do anything with it we need to
+ * check that the iomap we have cached is not stale. The inode extent
+ * mapping can change due to concurrent IO in flight (e.g.
+ * IOMAP_UNWRITTEN state can change and memory reclaim could have
+ * reclaimed a previously partially written page at this index after IO
+ * completion before this write reaches this file offset) and hence we
+ * could do the wrong thing here (zero a page range incorrectly or fail
+ * to zero) and corrupt data.
+ */
+ if (page_ops && page_ops->iomap_valid) {
+ bool iomap_valid = page_ops->iomap_valid(iter->inode,
+ &iter->iomap);
+ if (!iomap_valid) {
+ iter->iomap.flags |= IOMAP_F_STALE;
+ status = 0;
+ goto out_unlock;
+ }
+ }
+
if (pos + len > folio_pos(folio) + folio_size(folio))
len = folio_pos(folio) + folio_size(folio) - pos;
@@ -773,6 +794,8 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
status = iomap_write_begin(iter, pos, bytes, &folio);
if (unlikely(status))
break;
+ if (iter->iomap.flags & IOMAP_F_STALE)
+ break;
page = folio_file_page(folio, pos >> PAGE_SHIFT);
if (mapping_writably_mapped(mapping))
@@ -1082,6 +1105,8 @@ static loff_t iomap_unshare_iter(struct iomap_iter *iter)
status = iomap_write_begin(iter, pos, bytes, &folio);
if (unlikely(status))
return status;
+ if (iter->iomap.flags & IOMAP_F_STALE)
+ break;
status = iomap_write_end(iter, pos, bytes, bytes, folio);
if (WARN_ON_ONCE(status == 0))
@@ -1137,6 +1162,8 @@ static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero)
status = iomap_write_begin(iter, pos, bytes, &folio);
if (status)
return status;
+ if (iter->iomap.flags & IOMAP_F_STALE)
+ break;
offset = offset_in_folio(folio, pos);
if (bytes > folio_size(folio) - offset)
@@ -7,12 +7,28 @@
#include <linux/iomap.h>
#include "trace.h"
+/*
+ * Advance to the next range we need to map.
+ *
+ * If the iomap is marked IOMAP_F_STALE, it means the existing map was not fully
+ * processed - it was aborted because the extent the iomap spanned may have been
+ * changed during the operation. In this case, the iteration behaviour is to
+ * remap the unprocessed range of the iter, and that means we may need to remap
+ * even when we've made no progress (i.e. iter->processed = 0). Hence the
+ * "finished iterating" case needs to distinguish between
+ * (processed = 0) meaning we are done and (processed = 0 && stale) meaning we
+ * need to remap the entire remaining range.
+ */
static inline int iomap_iter_advance(struct iomap_iter *iter)
{
+ bool stale = iter->iomap.flags & IOMAP_F_STALE;
+
/* handle the previous iteration (if any) */
if (iter->iomap.length) {
- if (iter->processed <= 0)
+ if (iter->processed < 0)
return iter->processed;
+ if (!iter->processed && !stale)
+ return 0;
if (WARN_ON_ONCE(iter->processed > iomap_length(iter)))
return -EIO;
iter->pos += iter->processed;
@@ -33,6 +49,7 @@ static inline void iomap_iter_done(struct iomap_iter *iter)
WARN_ON_ONCE(iter->iomap.offset > iter->pos);
WARN_ON_ONCE(iter->iomap.length == 0);
WARN_ON_ONCE(iter->iomap.offset + iter->iomap.length <= iter->pos);
+ WARN_ON_ONCE(iter->iomap.flags & IOMAP_F_STALE);
trace_iomap_iter_dstmap(iter->inode, &iter->iomap);
if (iter->srcmap.type != IOMAP_HOLE)
@@ -49,26 +49,35 @@ struct vm_fault;
*
* IOMAP_F_BUFFER_HEAD indicates that the file system requires the use of
* buffer heads for this mapping.
+ *
+ * IOMAP_F_XATTR indicates that the iomap is for an extended attribute extent
+ * rather than a file data extent.
*/
-#define IOMAP_F_NEW 0x01
-#define IOMAP_F_DIRTY 0x02
-#define IOMAP_F_SHARED 0x04
-#define IOMAP_F_MERGED 0x08
-#define IOMAP_F_BUFFER_HEAD 0x10
-#define IOMAP_F_ZONE_APPEND 0x20
+#define IOMAP_F_NEW (1U << 0)
+#define IOMAP_F_DIRTY (1U << 1)
+#define IOMAP_F_SHARED (1U << 2)
+#define IOMAP_F_MERGED (1U << 3)
+#define IOMAP_F_BUFFER_HEAD (1U << 4)
+#define IOMAP_F_ZONE_APPEND (1U << 5)
+#define IOMAP_F_XATTR (1U << 6)
/*
* Flags set by the core iomap code during operations:
*
* IOMAP_F_SIZE_CHANGED indicates to the iomap_end method that the file size
* has changed as the result of this write operation.
+ *
+ * IOMAP_F_STALE indicates that the iomap is not valid any longer and the file
+ * range it covers needs to be remapped by the high level before the operation
+ * can proceed.
*/
-#define IOMAP_F_SIZE_CHANGED 0x100
+#define IOMAP_F_SIZE_CHANGED (1U << 8)
+#define IOMAP_F_STALE (1U << 9)
/*
* Flags from 0x1000 up are for file system specific usage:
*/
-#define IOMAP_F_PRIVATE 0x1000
+#define IOMAP_F_PRIVATE (1U << 12)
/*
@@ -89,6 +98,7 @@ struct iomap {
void *inline_data;
void *private; /* filesystem private */
const struct iomap_page_ops *page_ops;
+ u64 validity_cookie; /* used with .iomap_valid() */
};
static inline sector_t iomap_sector(const struct iomap *iomap, loff_t pos)
@@ -128,6 +138,23 @@ struct iomap_page_ops {
int (*page_prepare)(struct inode *inode, loff_t pos, unsigned len);
void (*page_done)(struct inode *inode, loff_t pos, unsigned copied,
struct page *page);
+
+ /*
+ * Check that the cached iomap still maps correctly to the filesystem's
+ * internal extent map. FS internal extent maps can change while iomap
+ * is iterating a cached iomap, so this hook allows iomap to detect that
+ * the iomap needs to be refreshed during a long running write
+ * operation.
+ *
+ * The filesystem can store internal state (e.g. a sequence number) in
+ * iomap->validity_cookie when the iomap is first mapped to be able to
+ * detect changes between mapping time and whenever .iomap_valid() is
+ * called.
+ *
+ * This is called with the folio over the specified file position held
+ * locked by the iomap code.
+ */
+ bool (*iomap_valid)(struct inode *inode, const struct iomap *iomap);
};
/*