diff mbox series

[v3,10/12] xfs: Commit CoW-based atomic writes atomically

Message ID 20250227180813.1553404-11-john.g.garry@oracle.com (mailing list archive)
State New
Headers show
Series large atomic writes for xfs with CoW | expand

Commit Message

John Garry Feb. 27, 2025, 6:08 p.m. UTC
When completing a CoW-based write, each extent range mapping update is
covered by a separate transaction.

For a CoW-based atomic write, all mappings must be changed at once, so
change to use a single transaction.

Signed-off-by: John Garry <john.g.garry@oracle.com>
---
 fs/xfs/xfs_file.c    |  5 ++++-
 fs/xfs/xfs_reflink.c | 49 ++++++++++++++++++++++++++++++++++++++++++++
 fs/xfs/xfs_reflink.h |  3 +++
 3 files changed, 56 insertions(+), 1 deletion(-)

Comments

Darrick J. Wong Feb. 28, 2025, 1:13 a.m. UTC | #1
On Thu, Feb 27, 2025 at 06:08:11PM +0000, John Garry wrote:
> When completing a CoW-based write, each extent range mapping update is
> covered by a separate transaction.
> 
> For a CoW-based atomic write, all mappings must be changed at once, so
> change to use a single transaction.
> 
> Signed-off-by: John Garry <john.g.garry@oracle.com>

Looks good to me now,
Reviewed-by: "Darrick J. Wong" <djwong@kernel.org>

--D

> ---
>  fs/xfs/xfs_file.c    |  5 ++++-
>  fs/xfs/xfs_reflink.c | 49 ++++++++++++++++++++++++++++++++++++++++++++
>  fs/xfs/xfs_reflink.h |  3 +++
>  3 files changed, 56 insertions(+), 1 deletion(-)
> 
> diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
> index 76ea59c638c3..44e11c433569 100644
> --- a/fs/xfs/xfs_file.c
> +++ b/fs/xfs/xfs_file.c
> @@ -527,7 +527,10 @@ xfs_dio_write_end_io(
>  	nofs_flag = memalloc_nofs_save();
>  
>  	if (flags & IOMAP_DIO_COW) {
> -		error = xfs_reflink_end_cow(ip, offset, size);
> +		if (iocb->ki_flags & IOCB_ATOMIC)
> +			error = xfs_reflink_end_atomic_cow(ip, offset, size);
> +		else
> +			error = xfs_reflink_end_cow(ip, offset, size);
>  		if (error)
>  			goto out;
>  	}
> diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
> index 97dc38841063..844e2b43357b 100644
> --- a/fs/xfs/xfs_reflink.c
> +++ b/fs/xfs/xfs_reflink.c
> @@ -987,6 +987,55 @@ xfs_reflink_end_cow(
>  		trace_xfs_reflink_end_cow_error(ip, error, _RET_IP_);
>  	return error;
>  }
> +int
> +xfs_reflink_end_atomic_cow(
> +	struct xfs_inode		*ip,
> +	xfs_off_t			offset,
> +	xfs_off_t			count)
> +{
> +	xfs_fileoff_t			offset_fsb;
> +	xfs_fileoff_t			end_fsb;
> +	int				error = 0;
> +	struct xfs_mount		*mp = ip->i_mount;
> +	struct xfs_trans		*tp;
> +	unsigned int			resblks;
> +
> +	trace_xfs_reflink_end_cow(ip, offset, count);
> +
> +	offset_fsb = XFS_B_TO_FSBT(mp, offset);
> +	end_fsb = XFS_B_TO_FSB(mp, offset + count);
> +
> +	/*
> +	 * Each remapping operation could cause a btree split, so in the worst
> +	 * case that's one for each block.
> +	 */
> +	resblks = (end_fsb - offset_fsb) *
> +			XFS_NEXTENTADD_SPACE_RES(mp, 1, XFS_DATA_FORK);
> +
> +	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0,
> +			XFS_TRANS_RESERVE, &tp);
> +	if (error)
> +		return error;
> +
> +	xfs_ilock(ip, XFS_ILOCK_EXCL);
> +	xfs_trans_ijoin(tp, ip, 0);
> +
> +	while (end_fsb > offset_fsb && !error) {
> +		error = xfs_reflink_end_cow_extent_locked(tp, ip, &offset_fsb,
> +				end_fsb);
> +	}
> +	if (error) {
> +		trace_xfs_reflink_end_cow_error(ip, error, _RET_IP_);
> +		goto out_cancel;
> +	}
> +	error = xfs_trans_commit(tp);
> +	xfs_iunlock(ip, XFS_ILOCK_EXCL);
> +	return error;
> +out_cancel:
> +	xfs_trans_cancel(tp);
> +	xfs_iunlock(ip, XFS_ILOCK_EXCL);
> +	return error;
> +}
>  
>  /*
>   * Free all CoW staging blocks that are still referenced by the ondisk refcount
> diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h
> index dfd94e51e2b4..4cb2ee53cd8d 100644
> --- a/fs/xfs/xfs_reflink.h
> +++ b/fs/xfs/xfs_reflink.h
> @@ -49,6 +49,9 @@ extern int xfs_reflink_cancel_cow_range(struct xfs_inode *ip, xfs_off_t offset,
>  		xfs_off_t count, bool cancel_real);
>  extern int xfs_reflink_end_cow(struct xfs_inode *ip, xfs_off_t offset,
>  		xfs_off_t count);
> +		int
> +xfs_reflink_end_atomic_cow(struct xfs_inode *ip, xfs_off_t offset,
> +		xfs_off_t count);

Nit: return type should be at column 0 and the name should be right
after.

int xfs_reflink_end_atomic_cow(struct xfs_inode *ip, xfs_off_t offset,
		xfs_off_t count);

With that fixed,
Reviewed-by: "Darrick J. Wong" <djwong@kernel.org>

--D

>  extern int xfs_reflink_recover_cow(struct xfs_mount *mp);
>  extern loff_t xfs_reflink_remap_range(struct file *file_in, loff_t pos_in,
>  		struct file *file_out, loff_t pos_out, loff_t len,
> -- 
> 2.31.1
>
diff mbox series

Patch

diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 76ea59c638c3..44e11c433569 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -527,7 +527,10 @@  xfs_dio_write_end_io(
 	nofs_flag = memalloc_nofs_save();
 
 	if (flags & IOMAP_DIO_COW) {
-		error = xfs_reflink_end_cow(ip, offset, size);
+		if (iocb->ki_flags & IOCB_ATOMIC)
+			error = xfs_reflink_end_atomic_cow(ip, offset, size);
+		else
+			error = xfs_reflink_end_cow(ip, offset, size);
 		if (error)
 			goto out;
 	}
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 97dc38841063..844e2b43357b 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -987,6 +987,55 @@  xfs_reflink_end_cow(
 		trace_xfs_reflink_end_cow_error(ip, error, _RET_IP_);
 	return error;
 }
+int
+xfs_reflink_end_atomic_cow(
+	struct xfs_inode		*ip,
+	xfs_off_t			offset,
+	xfs_off_t			count)
+{
+	xfs_fileoff_t			offset_fsb;
+	xfs_fileoff_t			end_fsb;
+	int				error = 0;
+	struct xfs_mount		*mp = ip->i_mount;
+	struct xfs_trans		*tp;
+	unsigned int			resblks;
+
+	trace_xfs_reflink_end_cow(ip, offset, count);
+
+	offset_fsb = XFS_B_TO_FSBT(mp, offset);
+	end_fsb = XFS_B_TO_FSB(mp, offset + count);
+
+	/*
+	 * Each remapping operation could cause a btree split, so in the worst
+	 * case that's one for each block.
+	 */
+	resblks = (end_fsb - offset_fsb) *
+			XFS_NEXTENTADD_SPACE_RES(mp, 1, XFS_DATA_FORK);
+
+	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0,
+			XFS_TRANS_RESERVE, &tp);
+	if (error)
+		return error;
+
+	xfs_ilock(ip, XFS_ILOCK_EXCL);
+	xfs_trans_ijoin(tp, ip, 0);
+
+	while (end_fsb > offset_fsb && !error) {
+		error = xfs_reflink_end_cow_extent_locked(tp, ip, &offset_fsb,
+				end_fsb);
+	}
+	if (error) {
+		trace_xfs_reflink_end_cow_error(ip, error, _RET_IP_);
+		goto out_cancel;
+	}
+	error = xfs_trans_commit(tp);
+	xfs_iunlock(ip, XFS_ILOCK_EXCL);
+	return error;
+out_cancel:
+	xfs_trans_cancel(tp);
+	xfs_iunlock(ip, XFS_ILOCK_EXCL);
+	return error;
+}
 
 /*
  * Free all CoW staging blocks that are still referenced by the ondisk refcount
diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h
index dfd94e51e2b4..4cb2ee53cd8d 100644
--- a/fs/xfs/xfs_reflink.h
+++ b/fs/xfs/xfs_reflink.h
@@ -49,6 +49,9 @@  extern int xfs_reflink_cancel_cow_range(struct xfs_inode *ip, xfs_off_t offset,
 		xfs_off_t count, bool cancel_real);
 extern int xfs_reflink_end_cow(struct xfs_inode *ip, xfs_off_t offset,
 		xfs_off_t count);
+		int
+xfs_reflink_end_atomic_cow(struct xfs_inode *ip, xfs_off_t offset,
+		xfs_off_t count);
 extern int xfs_reflink_recover_cow(struct xfs_mount *mp);
 extern loff_t xfs_reflink_remap_range(struct file *file_in, loff_t pos_in,
 		struct file *file_out, loff_t pos_out, loff_t len,