diff mbox series

[v6,5/7] xfs: Support atomic write for statx

Message ID 20240930125438.2501050-6-john.g.garry@oracle.com (mailing list archive)
State New
Headers show
Series block atomic writes for xfs | expand

Commit Message

John Garry Sept. 30, 2024, 12:54 p.m. UTC
Support providing info on atomic write unit min and max for an inode.

For simplicity, currently we limit the min at the FS block size. As for
max, we limit also at FS block size, as there is no current method to
guarantee extent alignment or granularity for regular files.

Signed-off-by: John Garry <john.g.garry@oracle.com>
---
 fs/xfs/xfs_inode.h | 17 +++++++++++++++++
 fs/xfs/xfs_iops.c  | 24 ++++++++++++++++++++++++
 2 files changed, 41 insertions(+)

Comments

Darrick J. Wong Sept. 30, 2024, 4:37 p.m. UTC | #1
On Mon, Sep 30, 2024 at 12:54:36PM +0000, John Garry wrote:
> Support providing info on atomic write unit min and max for an inode.
> 
> For simplicity, currently we limit the min at the FS block size. As for
> max, we limit also at FS block size, as there is no current method to
> guarantee extent alignment or granularity for regular files.
> 
> Signed-off-by: John Garry <john.g.garry@oracle.com>
> ---
>  fs/xfs/xfs_inode.h | 17 +++++++++++++++++
>  fs/xfs/xfs_iops.c  | 24 ++++++++++++++++++++++++
>  2 files changed, 41 insertions(+)
> 
> diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
> index 1c62ee294a5a..1ea73402d592 100644
> --- a/fs/xfs/xfs_inode.h
> +++ b/fs/xfs/xfs_inode.h
> @@ -332,6 +332,23 @@ static inline bool xfs_inode_has_atomicwrites(struct xfs_inode *ip)
>  	return ip->i_diflags2 & XFS_DIFLAG2_ATOMICWRITES;
>  }
>  
> +static inline bool
> +xfs_inode_can_atomicwrite(
> +	struct xfs_inode	*ip)
> +{
> +	struct xfs_mount	*mp = ip->i_mount;
> +	struct xfs_buftarg	*target = xfs_inode_buftarg(ip);
> +
> +	if (!xfs_inode_has_atomicwrites(ip))
> +		return false;
> +	if (mp->m_sb.sb_blocksize < target->bt_bdev_awu_min)
> +		return false;
> +	if (mp->m_sb.sb_blocksize > target->bt_bdev_awu_max)
> +		return false;
> +
> +	return true;
> +}
> +
>  /*
>   * In-core inode flags.
>   */
> diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
> index ee79cf161312..915d057db9bb 100644
> --- a/fs/xfs/xfs_iops.c
> +++ b/fs/xfs/xfs_iops.c
> @@ -570,6 +570,23 @@ xfs_stat_blksize(
>  	return max_t(uint32_t, PAGE_SIZE, mp->m_sb.sb_blocksize);
>  }
>  
> +static void
> +xfs_get_atomic_write_attr(
> +	struct xfs_inode	*ip,
> +	unsigned int		*unit_min,
> +	unsigned int		*unit_max)
> +{
> +	struct xfs_mount	*mp = ip->i_mount;
> +	struct xfs_sb		*sbp = &mp->m_sb;
> +
> +	if (!xfs_inode_can_atomicwrite(ip)) {
> +		*unit_min = *unit_max = 0;
> +		return;
> +	}
> +
> +	*unit_min = *unit_max = sbp->sb_blocksize;

Ok, so we're only supporting untorn writes if they're exactly the fs
blocksize, and 1 fsblock is between awu_min/max.  That simplifies a lot
of things. :)

Not supporting sub-fsblock atomic writes means that we'll never hit the
directio COW fallback code, which uses the pagecache.

Not supporting multi-fsblock atomic writes means that you don't have to
figure out how to ensure that we always do cow on forcealign
granularity.  Though as I pointed out elsewhere in this thread, that's a
forcealign problem.

Yay! ;)

> +}
> +
>  STATIC int
>  xfs_vn_getattr(
>  	struct mnt_idmap	*idmap,
> @@ -643,6 +660,13 @@ xfs_vn_getattr(
>  			stat->dio_mem_align = bdev_dma_alignment(bdev) + 1;
>  			stat->dio_offset_align = bdev_logical_block_size(bdev);
>  		}
> +		if (request_mask & STATX_WRITE_ATOMIC) {
> +			unsigned int unit_min, unit_max;
> +
> +			xfs_get_atomic_write_attr(ip, &unit_min, &unit_max);
> +			generic_fill_statx_atomic_writes(stat,
> +				unit_min, unit_max);

Consistent indenting and wrapping, please:

			xfs_get_atomic_write_attr(ip, &unit_min,
					&unit_max);
			generic_fill_statx_atomic_writes(stat,
					unit_min, unit_max);


With that fixed,
Reviewed-by: Darrick J. Wong <djwong@kernel.org>

--D


> +		}
>  		fallthrough;
>  	default:
>  		stat->blksize = xfs_stat_blksize(ip);
> -- 
> 2.31.1
> 
>
John Garry Oct. 1, 2024, 8:29 a.m. UTC | #2
On 30/09/2024 17:37, Darrick J. Wong wrote:
> On Mon, Sep 30, 2024 at 12:54:36PM +0000, John Garry wrote:
>> Support providing info on atomic write unit min and max for an inode.
>>
>> For simplicity, currently we limit the min at the FS block size. As for
>> max, we limit also at FS block size, as there is no current method to
>> guarantee extent alignment or granularity for regular files.
>>
>> Signed-off-by: John Garry <john.g.garry@oracle.com>
>> ---
>>   fs/xfs/xfs_inode.h | 17 +++++++++++++++++
>>   fs/xfs/xfs_iops.c  | 24 ++++++++++++++++++++++++
>>   2 files changed, 41 insertions(+)
>>
>> diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
>> index 1c62ee294a5a..1ea73402d592 100644
>> --- a/fs/xfs/xfs_inode.h
>> +++ b/fs/xfs/xfs_inode.h
>> @@ -332,6 +332,23 @@ static inline bool xfs_inode_has_atomicwrites(struct xfs_inode *ip)
>>   	return ip->i_diflags2 & XFS_DIFLAG2_ATOMICWRITES;
>>   }
>>   
>> +static inline bool
>> +xfs_inode_can_atomicwrite(
>> +	struct xfs_inode	*ip)
>> +{
>> +	struct xfs_mount	*mp = ip->i_mount;
>> +	struct xfs_buftarg	*target = xfs_inode_buftarg(ip);
>> +
>> +	if (!xfs_inode_has_atomicwrites(ip))
>> +		return false;
>> +	if (mp->m_sb.sb_blocksize < target->bt_bdev_awu_min)
>> +		return false;
>> +	if (mp->m_sb.sb_blocksize > target->bt_bdev_awu_max)
>> +		return false;
>> +
>> +	return true;
>> +}
>> +
>>   /*
>>    * In-core inode flags.
>>    */
>> diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
>> index ee79cf161312..915d057db9bb 100644
>> --- a/fs/xfs/xfs_iops.c
>> +++ b/fs/xfs/xfs_iops.c
>> @@ -570,6 +570,23 @@ xfs_stat_blksize(
>>   	return max_t(uint32_t, PAGE_SIZE, mp->m_sb.sb_blocksize);
>>   }
>>   
>> +static void
>> +xfs_get_atomic_write_attr(
>> +	struct xfs_inode	*ip,
>> +	unsigned int		*unit_min,
>> +	unsigned int		*unit_max)
>> +{
>> +	struct xfs_mount	*mp = ip->i_mount;
>> +	struct xfs_sb		*sbp = &mp->m_sb;
>> +
>> +	if (!xfs_inode_can_atomicwrite(ip)) {
>> +		*unit_min = *unit_max = 0;
>> +		return;
>> +	}
>> +
>> +	*unit_min = *unit_max = sbp->sb_blocksize;
> 
> Ok, so we're only supporting untorn writes if they're exactly the fs
> blocksize, and 1 fsblock is between awu_min/max.  That simplifies a lot
> of things. :)
> 
> Not supporting sub-fsblock atomic writes means that we'll never hit the
> directio COW fallback code, which uses the pagecache.

My original idea (with forcealign) was to support 1FSB and larger.

I suppose that with a larger FS block size we might want to support 
sub-fsblock atomic writes. However, for the moment, I don't see a need 
to support this.

> 
> Not supporting multi-fsblock atomic writes means that you don't have to
> figure out how to ensure that we always do cow on forcealign
> granularity.  Though as I pointed out elsewhere in this thread, that's a
> forcealign problem.

Sure

> 
> Yay! ;)
> 
>> +}
>> +
>>   STATIC int
>>   xfs_vn_getattr(
>>   	struct mnt_idmap	*idmap,
>> @@ -643,6 +660,13 @@ xfs_vn_getattr(
>>   			stat->dio_mem_align = bdev_dma_alignment(bdev) + 1;
>>   			stat->dio_offset_align = bdev_logical_block_size(bdev);
>>   		}
>> +		if (request_mask & STATX_WRITE_ATOMIC) {
>> +			unsigned int unit_min, unit_max;
>> +
>> +			xfs_get_atomic_write_attr(ip, &unit_min, &unit_max);
>> +			generic_fill_statx_atomic_writes(stat,
>> +				unit_min, unit_max);
> 
> Consistent indenting and wrapping, please:

ok

> 
> 			xfs_get_atomic_write_attr(ip, &unit_min,
> 					&unit_max);
> 			generic_fill_statx_atomic_writes(stat,
> 					unit_min, unit_max);
> 
> 
> With that fixed,
> Reviewed-by: Darrick J. Wong <djwong@kernel.org>

ok, thanks!
Christoph Hellwig Oct. 1, 2024, 8:43 a.m. UTC | #3
On Mon, Sep 30, 2024 at 09:37:16AM -0700, Darrick J. Wong wrote:
> Ok, so we're only supporting untorn writes if they're exactly the fs
> blocksize, and 1 fsblock is between awu_min/max.  That simplifies a lot
> of things. :)
> 
> Not supporting sub-fsblock atomic writes means that we'll never hit the
> directio COW fallback code, which uses the pagecache.
> 
> Not supporting multi-fsblock atomic writes means that you don't have to
> figure out how to ensure that we always do cow on forcealign
> granularity.  Though as I pointed out elsewhere in this thread, that's a
> forcealign problem.

It does simplify things a lot, and is probably a good idea for
the initial version.  But I suspect support for atomic writes
smaller than the block size will be really useful and we should
eventually support them, but maybe now on reflinked files or
alwayscow.
diff mbox series

Patch

diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 1c62ee294a5a..1ea73402d592 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -332,6 +332,23 @@  static inline bool xfs_inode_has_atomicwrites(struct xfs_inode *ip)
 	return ip->i_diflags2 & XFS_DIFLAG2_ATOMICWRITES;
 }
 
+static inline bool
+xfs_inode_can_atomicwrite(
+	struct xfs_inode	*ip)
+{
+	struct xfs_mount	*mp = ip->i_mount;
+	struct xfs_buftarg	*target = xfs_inode_buftarg(ip);
+
+	if (!xfs_inode_has_atomicwrites(ip))
+		return false;
+	if (mp->m_sb.sb_blocksize < target->bt_bdev_awu_min)
+		return false;
+	if (mp->m_sb.sb_blocksize > target->bt_bdev_awu_max)
+		return false;
+
+	return true;
+}
+
 /*
  * In-core inode flags.
  */
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index ee79cf161312..915d057db9bb 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -570,6 +570,23 @@  xfs_stat_blksize(
 	return max_t(uint32_t, PAGE_SIZE, mp->m_sb.sb_blocksize);
 }
 
+static void
+xfs_get_atomic_write_attr(
+	struct xfs_inode	*ip,
+	unsigned int		*unit_min,
+	unsigned int		*unit_max)
+{
+	struct xfs_mount	*mp = ip->i_mount;
+	struct xfs_sb		*sbp = &mp->m_sb;
+
+	if (!xfs_inode_can_atomicwrite(ip)) {
+		*unit_min = *unit_max = 0;
+		return;
+	}
+
+	*unit_min = *unit_max = sbp->sb_blocksize;
+}
+
 STATIC int
 xfs_vn_getattr(
 	struct mnt_idmap	*idmap,
@@ -643,6 +660,13 @@  xfs_vn_getattr(
 			stat->dio_mem_align = bdev_dma_alignment(bdev) + 1;
 			stat->dio_offset_align = bdev_logical_block_size(bdev);
 		}
+		if (request_mask & STATX_WRITE_ATOMIC) {
+			unsigned int unit_min, unit_max;
+
+			xfs_get_atomic_write_attr(ip, &unit_min, &unit_max);
+			generic_fill_statx_atomic_writes(stat,
+				unit_min, unit_max);
+		}
 		fallthrough;
 	default:
 		stat->blksize = xfs_stat_blksize(ip);