Message ID | 20241015090142.3189518-6-john.g.garry@oracle.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | block atomic writes for xfs | expand |
On Tue, Oct 15, 2024 at 09:01:40AM +0000, John Garry wrote: > Support providing info on atomic write unit min and max for an inode. > > For simplicity, currently we limit the min at the FS block size. As for > max, we limit also at FS block size, as there is no current method to > guarantee extent alignment or granularity for regular files. > > Reviewed-by: "Darrick J. Wong" <djwong@kernel.org> > Signed-off-by: John Garry <john.g.garry@oracle.com> > --- > fs/xfs/xfs_buf.c | 7 +++++++ > fs/xfs/xfs_buf.h | 3 +++ > fs/xfs/xfs_inode.h | 15 +++++++++++++++ > fs/xfs/xfs_iops.c | 25 +++++++++++++++++++++++++ > 4 files changed, 50 insertions(+) > > diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c > index aa4dbda7b536..e279e5e139ff 100644 > --- a/fs/xfs/xfs_buf.c > +++ b/fs/xfs/xfs_buf.c > @@ -2115,6 +2115,13 @@ xfs_alloc_buftarg( > btp->bt_daxdev = fs_dax_get_by_bdev(btp->bt_bdev, &btp->bt_dax_part_off, > mp, ops); > > + if (bdev_can_atomic_write(btp->bt_bdev)) { > + struct request_queue *q = bdev_get_queue(btp->bt_bdev); > + > + btp->bt_bdev_awu_min = queue_atomic_write_unit_min_bytes(q); > + btp->bt_bdev_awu_max = queue_atomic_write_unit_max_bytes(q); Consumers of the block layer should never see request_queue. While there is a few leftovers still I've cleaned most of this up. Please add bdev_atomic_write_unit_min_bytes and bdev_atomic_write_unit_max_bytes helpers for use by file systems and stacking drivers, similar to the other queue limits. > + /* Atomic write unit values */ > + unsigned int bt_bdev_awu_min, bt_bdev_awu_max; Nit: While having two struct members declare on the same line using the same type specification is perfectly valid C, it looks odd and we avoid it in XFS (and most of the kernel). Please split this into two lines. > + struct xfs_mount *mp = ip->i_mount; > + struct xfs_sb *sbp = &mp->m_sb; > + > + if (!xfs_inode_can_atomicwrite(ip)) { > + *unit_min = *unit_max = 0; > + return; > + } > + > + *unit_min = *unit_max = sbp->sb_blocksize; Nit: I'd do with the single use sbp local variable here. > +} > + > STATIC int > xfs_vn_getattr( > struct mnt_idmap *idmap, > @@ -643,6 +660,14 @@ xfs_vn_getattr( > stat->dio_mem_align = bdev_dma_alignment(bdev) + 1; > stat->dio_offset_align = bdev_logical_block_size(bdev); > } > + if (request_mask & STATX_WRITE_ATOMIC) { > + unsigned int unit_min, unit_max; Nit: XFS (unlike the rest of the kernel) uses tab alignments for variables. Otherwise this looks good: Reviewed-by: Christoph Hellwig <hch@lst.de>
On 15/10/2024 13:15, Christoph Hellwig wrote: > On Tue, Oct 15, 2024 at 09:01:40AM +0000, John Garry wrote: >> Support providing info on atomic write unit min and max for an inode. >> >> For simplicity, currently we limit the min at the FS block size. As for >> max, we limit also at FS block size, as there is no current method to >> guarantee extent alignment or granularity for regular files. >> >> Reviewed-by: "Darrick J. Wong" <djwong@kernel.org> >> Signed-off-by: John Garry <john.g.garry@oracle.com> >> --- >> fs/xfs/xfs_buf.c | 7 +++++++ >> fs/xfs/xfs_buf.h | 3 +++ >> fs/xfs/xfs_inode.h | 15 +++++++++++++++ >> fs/xfs/xfs_iops.c | 25 +++++++++++++++++++++++++ >> 4 files changed, 50 insertions(+) >> >> diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c >> index aa4dbda7b536..e279e5e139ff 100644 >> --- a/fs/xfs/xfs_buf.c >> +++ b/fs/xfs/xfs_buf.c >> @@ -2115,6 +2115,13 @@ xfs_alloc_buftarg( >> btp->bt_daxdev = fs_dax_get_by_bdev(btp->bt_bdev, &btp->bt_dax_part_off, >> mp, ops); >> >> + if (bdev_can_atomic_write(btp->bt_bdev)) { >> + struct request_queue *q = bdev_get_queue(btp->bt_bdev); >> + >> + btp->bt_bdev_awu_min = queue_atomic_write_unit_min_bytes(q); >> + btp->bt_bdev_awu_max = queue_atomic_write_unit_max_bytes(q); > > Consumers of the block layer should never see request_queue. While there > is a few leftovers still I've cleaned most of this up. Please add > bdev_atomic_write_unit_min_bytes and bdev_atomic_write_unit_max_bytes > helpers for use by file systems and stacking drivers, similar to the > other queue limits. ok, fine > >> + /* Atomic write unit values */ >> + unsigned int bt_bdev_awu_min, bt_bdev_awu_max; > > Nit: While having two struct members declare on the same line using the > same type specification is perfectly valid C, it looks odd and we avoid > it in XFS (and most of the kernel). Please split this into two lines. sure > >> + struct xfs_mount *mp = ip->i_mount; >> + struct xfs_sb *sbp = &mp->m_sb; >> + >> + if (!xfs_inode_can_atomicwrite(ip)) { >> + *unit_min = *unit_max = 0; >> + return; >> + } >> + >> + *unit_min = *unit_max = sbp->sb_blocksize; > > Nit: I'd do with the single use sbp local variable here. I think that you mean do without. > >> +} >> + >> STATIC int >> xfs_vn_getattr( >> struct mnt_idmap *idmap, >> @@ -643,6 +660,14 @@ xfs_vn_getattr( >> stat->dio_mem_align = bdev_dma_alignment(bdev) + 1; >> stat->dio_offset_align = bdev_logical_block_size(bdev); >> } >> + if (request_mask & STATX_WRITE_ATOMIC) { >> + unsigned int unit_min, unit_max; > > Nit: XFS (unlike the rest of the kernel) uses tab alignments for > variables. ok > > Otherwise this looks good: > > Reviewed-by: Christoph Hellwig <hch@lst.de> > cheers
On Tue, Oct 15, 2024 at 01:22:27PM +0100, John Garry wrote: >> Nit: I'd do with the single use sbp local variable here. > > I think that you mean do without. Yes, sorry.
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index aa4dbda7b536..e279e5e139ff 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -2115,6 +2115,13 @@ xfs_alloc_buftarg( btp->bt_daxdev = fs_dax_get_by_bdev(btp->bt_bdev, &btp->bt_dax_part_off, mp, ops); + if (bdev_can_atomic_write(btp->bt_bdev)) { + struct request_queue *q = bdev_get_queue(btp->bt_bdev); + + btp->bt_bdev_awu_min = queue_atomic_write_unit_min_bytes(q); + btp->bt_bdev_awu_max = queue_atomic_write_unit_max_bytes(q); + } + /* * When allocating the buftargs we have not yet read the super block and * thus don't know the file system sector size yet. diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index 209a389f2abc..2be28bd01087 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -124,6 +124,9 @@ struct xfs_buftarg { struct percpu_counter bt_io_count; struct ratelimit_state bt_ioerror_rl; + /* Atomic write unit values */ + unsigned int bt_bdev_awu_min, bt_bdev_awu_max; + /* built-in cache, if we're not using the perag one */ struct xfs_buf_cache bt_cache[]; }; diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 97ed912306fd..73009a25a119 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -327,6 +327,21 @@ static inline bool xfs_inode_has_bigrtalloc(struct xfs_inode *ip) (XFS_IS_REALTIME_INODE(ip) ? \ (ip)->i_mount->m_rtdev_targp : (ip)->i_mount->m_ddev_targp) +static inline bool +xfs_inode_can_atomicwrite( + struct xfs_inode *ip) +{ + struct xfs_mount *mp = ip->i_mount; + struct xfs_buftarg *target = xfs_inode_buftarg(ip); + + if (mp->m_sb.sb_blocksize < target->bt_bdev_awu_min) + return false; + if (mp->m_sb.sb_blocksize > target->bt_bdev_awu_max) + return false; + + return true; +} + /* * In-core inode flags. */ diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index ee79cf161312..919fbcb4b72a 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -570,6 +570,23 @@ xfs_stat_blksize( return max_t(uint32_t, PAGE_SIZE, mp->m_sb.sb_blocksize); } +static void +xfs_get_atomic_write_attr( + struct xfs_inode *ip, + unsigned int *unit_min, + unsigned int *unit_max) +{ + struct xfs_mount *mp = ip->i_mount; + struct xfs_sb *sbp = &mp->m_sb; + + if (!xfs_inode_can_atomicwrite(ip)) { + *unit_min = *unit_max = 0; + return; + } + + *unit_min = *unit_max = sbp->sb_blocksize; +} + STATIC int xfs_vn_getattr( struct mnt_idmap *idmap, @@ -643,6 +660,14 @@ xfs_vn_getattr( stat->dio_mem_align = bdev_dma_alignment(bdev) + 1; stat->dio_offset_align = bdev_logical_block_size(bdev); } + if (request_mask & STATX_WRITE_ATOMIC) { + unsigned int unit_min, unit_max; + + xfs_get_atomic_write_attr(ip, &unit_min, + &unit_max); + generic_fill_statx_atomic_writes(stat, + unit_min, unit_max); + } fallthrough; default: stat->blksize = xfs_stat_blksize(ip);