diff mbox series

[03/11] xfs: convert partially written rt file extents to completely written

Message ID 173405125794.1184063.17925337081966040081.stgit@frogsfrogsfrogs (mailing list archive)
State New
Headers show
Series [01/11] vfs: explicitly pass the block size to the remap prep function | expand

Commit Message

Darrick J. Wong Dec. 13, 2024, 1:22 a.m. UTC
From: Darrick J. Wong <djwong@kernel.org>

Create a utility function to convert the partially written extents of a
realtime file to be completely written.  In other words, if rextsize==7
and only block 6 is unwritten, these functions will zero out block 6 and
convert the mapping to written so that the entire 7-block allocation
unit can be remapped in a single operation.  This is required for any
rt file remapping activities that do not use log items to restart
interrupted operations.

Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
---
 fs/xfs/libxfs/xfs_rtbitmap.h |   12 +++
 fs/xfs/xfs_bmap_util.c       |  182 ++++++++++++++++++++++++++++++++++++++++++
 fs/xfs/xfs_bmap_util.h       |    7 ++
 fs/xfs/xfs_trace.h           |   11 ++-
 4 files changed, 208 insertions(+), 4 deletions(-)
diff mbox series

Patch

diff --git a/fs/xfs/libxfs/xfs_rtbitmap.h b/fs/xfs/libxfs/xfs_rtbitmap.h
index 22e5d9cd95f47c..89eb1e42128b38 100644
--- a/fs/xfs/libxfs/xfs_rtbitmap.h
+++ b/fs/xfs/libxfs/xfs_rtbitmap.h
@@ -157,6 +157,18 @@  xfs_rtb_to_rtxoff(
 	return do_div(rtbno, mp->m_sb.sb_rextsize);
 }
 
+/* Return the offset of a file block offset within an rt extent. */
+static inline xfs_extlen_t
+xfs_fileoff_to_rtxoff(
+	struct xfs_mount	*mp,
+	xfs_fileoff_t		off)
+{
+	if (likely(mp->m_rtxblklog >= 0))
+		return off & mp->m_rtxblkmask;
+
+	return do_div(off, mp->m_sb.sb_rextsize);
+}
+
 /* Round this file block offset up to the nearest rt extent size. */
 static inline xfs_rtblock_t
 xfs_fileoff_roundup_rtx(
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 0836fea2d6d814..3229b756f33780 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1726,3 +1726,185 @@  xfs_swap_extents(
 	xfs_trans_cancel(tp);
 	goto out_unlock_ilock;
 }
+
+#ifdef CONFIG_XFS_RT
+/*
+ * Decide if this is an unwritten extent that isn't aligned to an allocation
+ * unit boundary.
+ *
+ * If it is, shorten the mapping to the end of the allocation unit so that
+ * we're ready to convert all the mappings for this allocation unit to a zeroed
+ * written extent.  If not, return false.
+ */
+static inline bool
+xfs_want_convert_rtbigalloc_mapping(
+	struct xfs_mount	*mp,
+	struct xfs_bmbt_irec	*irec)
+{
+	xfs_fileoff_t		rext_next;
+	xfs_extlen_t		modoff, modcnt;
+
+	if (irec->br_state != XFS_EXT_UNWRITTEN)
+		return false;
+
+	modoff = xfs_fileoff_to_rtxoff(mp, irec->br_startoff);
+	if (modoff == 0) {
+		xfs_rtbxlen_t	rexts;
+
+		rexts = xfs_blen_to_rtbxlen(mp, irec->br_blockcount);
+		modcnt = xfs_blen_to_rtxoff(mp, irec->br_blockcount);
+		if (rexts > 0) {
+			/*
+			 * Unwritten mapping starts at an rt extent boundary
+			 * and is longer than one rt extent.  Round the length
+			 * down to the nearest extent but don't select it for
+			 * conversion.
+			 */
+			irec->br_blockcount -= modcnt;
+			modcnt = 0;
+		}
+
+		/* Unwritten mapping is perfectly aligned, do not convert. */
+		if (modcnt == 0)
+			return false;
+	}
+
+	/*
+	 * Unaligned and unwritten; trim to the current rt extent and select it
+	 * for conversion.
+	 */
+	rext_next = (irec->br_startoff - modoff) + mp->m_sb.sb_rextsize;
+	xfs_trim_extent(irec, irec->br_startoff, rext_next - irec->br_startoff);
+	return true;
+}
+
+/*
+ * Find an unwritten extent in the given file range, zero it, and convert the
+ * mapping to written.  Adjust the scan cursor on the way out.
+ */
+STATIC int
+xfs_convert_rtbigalloc_mapping(
+	struct xfs_inode	*ip,
+	xfs_fileoff_t		*offp,
+	xfs_fileoff_t		endoff)
+{
+	struct xfs_bmbt_irec	irec;
+	struct xfs_mount	*mp = ip->i_mount;
+	struct xfs_trans	*tp;
+	unsigned int		resblks;
+	int			nmap;
+	int			error;
+
+	resblks = XFS_DIOSTRAT_SPACE_RES(mp, 1);
+	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, &tp);
+	if (error)
+		return error;
+
+	xfs_ilock(ip, XFS_ILOCK_EXCL);
+	xfs_trans_ijoin(tp, ip, 0);
+
+	/*
+	 * Read the mapping.  If we find an unwritten extent that isn't aligned
+	 * to an allocation unit...
+	 */
+retry:
+	nmap = 1;
+	error = xfs_bmapi_read(ip, *offp, endoff - *offp, &irec, &nmap, 0);
+	if (error)
+		goto out_cancel;
+	ASSERT(nmap == 1);
+	ASSERT(irec.br_startoff == *offp);
+	if (!xfs_want_convert_rtbigalloc_mapping(mp, &irec)) {
+		*offp = irec.br_startoff + irec.br_blockcount;
+		if (*offp >= endoff)
+			goto out_cancel;
+		goto retry;
+	}
+
+	/*
+	 * ...then write zeroes to the space and change the mapping state to
+	 * written.  This consolidates the mappings for this allocation unit.
+	 */
+	nmap = 1;
+	error = xfs_bmapi_write(tp, ip, irec.br_startoff, irec.br_blockcount,
+			XFS_BMAPI_CONVERT | XFS_BMAPI_ZERO, 0, &irec, &nmap);
+	if (error)
+		goto out_cancel;
+	error = xfs_trans_commit(tp);
+	if (error)
+		goto out_unlock;
+
+	xfs_iunlock(ip, XFS_ILOCK_EXCL);
+
+	/*
+	 * If an unwritten mapping was returned, something is very wrong.
+	 * If no mapping was returned, then bmapi_write thought it performed
+	 * a short allocation, which should be impossible since we previously
+	 * queried the mapping and haven't cycled locks since then.  Either
+	 * way, fail the operation.
+	 */
+	if (nmap == 0 || irec.br_state != XFS_EXT_NORM) {
+		ASSERT(nmap != 0);
+		ASSERT(irec.br_state == XFS_EXT_NORM);
+		return -EIO;
+	}
+
+	/* Advance the cursor to the end of the mapping returned. */
+	*offp = irec.br_startoff + irec.br_blockcount;
+	return 0;
+
+out_cancel:
+	xfs_trans_cancel(tp);
+out_unlock:
+	xfs_iunlock(ip, XFS_ILOCK_EXCL);
+	return error;
+}
+
+/*
+ * Prepare a file with multi-fsblock allocation units for a remapping.
+ *
+ * File allocation units (AU) must be fully mapped to the data fork.  If the
+ * space in an AU have not been fully written, there can be multiple extent
+ * mappings (e.g. mixed written and unwritten blocks) to the AU.  If the log
+ * does not have a means to ensure that all remappings for a given AU will be
+ * completed even if the fs goes down, we must maintain the above constraint in
+ * another way.
+ *
+ * Convert the unwritten parts of an AU to written by writing zeroes to the
+ * storage and flipping the mapping.  Once this completes, there will be a
+ * single mapping for the entire AU, and we can proceed with the remapping
+ * operation.
+ *
+ * Callers must ensure that there are no dirty pages in the given range.
+ */
+int
+xfs_convert_rtbigalloc_file_space(
+	struct xfs_inode	*ip,
+	loff_t			pos,
+	uint64_t		len)
+{
+	struct xfs_mount	*mp = ip->i_mount;
+	xfs_fileoff_t		off;
+	xfs_fileoff_t		endoff;
+	int			error;
+
+	if (!xfs_inode_has_bigrtalloc(ip))
+		return 0;
+
+	off = xfs_fileoff_rounddown_rtx(mp, XFS_B_TO_FSBT(mp, pos));
+	endoff = xfs_fileoff_roundup_rtx(mp, XFS_B_TO_FSB(mp, pos + len));
+
+	trace_xfs_convert_rtbigalloc_file_space(ip, pos, len);
+
+	while (off < endoff) {
+		if (fatal_signal_pending(current))
+			return -EINTR;
+
+		error = xfs_convert_rtbigalloc_mapping(ip, &off, endoff);
+		if (error)
+			return error;
+	}
+
+	return 0;
+}
+#endif /* CONFIG_XFS_RT */
diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h
index b29760d36e1ab1..3834962670449f 100644
--- a/fs/xfs/xfs_bmap_util.h
+++ b/fs/xfs/xfs_bmap_util.h
@@ -79,4 +79,11 @@  int xfs_bmap_count_blocks(struct xfs_trans *tp, struct xfs_inode *ip,
 int	xfs_flush_unmap_range(struct xfs_inode *ip, xfs_off_t offset,
 			      xfs_off_t len);
 
+#ifdef CONFIG_XFS_RT
+int xfs_convert_rtbigalloc_file_space(struct xfs_inode *ip, loff_t pos,
+		uint64_t len);
+#else
+# define xfs_convert_rtbigalloc_file_space(ip, pos, len)	(-EOPNOTSUPP)
+#endif
+
 #endif	/* __XFS_BMAP_UTIL_H__ */
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 4fe689410eb6ae..8af9c38bea152f 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -1555,7 +1555,7 @@  DEFINE_IMAP_EVENT(xfs_iomap_alloc);
 DEFINE_IMAP_EVENT(xfs_iomap_found);
 
 DECLARE_EVENT_CLASS(xfs_simple_io_class,
-	TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count),
+	TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, u64 count),
 	TP_ARGS(ip, offset, count),
 	TP_STRUCT__entry(
 		__field(dev_t, dev)
@@ -1563,7 +1563,7 @@  DECLARE_EVENT_CLASS(xfs_simple_io_class,
 		__field(loff_t, isize)
 		__field(loff_t, disize)
 		__field(loff_t, offset)
-		__field(size_t, count)
+		__field(u64, count)
 	),
 	TP_fast_assign(
 		__entry->dev = VFS_I(ip)->i_sb->s_dev;
@@ -1574,7 +1574,7 @@  DECLARE_EVENT_CLASS(xfs_simple_io_class,
 		__entry->count = count;
 	),
 	TP_printk("dev %d:%d ino 0x%llx isize 0x%llx disize 0x%llx "
-		  "pos 0x%llx bytecount 0x%zx",
+		  "pos 0x%llx bytecount 0x%llx",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->ino,
 		  __entry->isize,
@@ -1585,7 +1585,7 @@  DECLARE_EVENT_CLASS(xfs_simple_io_class,
 
 #define DEFINE_SIMPLE_IO_EVENT(name)	\
 DEFINE_EVENT(xfs_simple_io_class, name,	\
-	TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count),	\
+	TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, u64 count),	\
 	TP_ARGS(ip, offset, count))
 DEFINE_SIMPLE_IO_EVENT(xfs_delalloc_enospc);
 DEFINE_SIMPLE_IO_EVENT(xfs_unwritten_convert);
@@ -3971,6 +3971,9 @@  TRACE_EVENT(xfs_ioctl_clone,
 /* unshare tracepoints */
 DEFINE_SIMPLE_IO_EVENT(xfs_reflink_unshare);
 DEFINE_INODE_ERROR_EVENT(xfs_reflink_unshare_error);
+#ifdef CONFIG_XFS_RT
+DEFINE_SIMPLE_IO_EVENT(xfs_convert_rtbigalloc_file_space);
+#endif /* CONFIG_XFS_RT */
 
 /* copy on write */
 DEFINE_INODE_IREC_EVENT(xfs_reflink_trim_around_shared);