Message ID | 20250227180813.1553404-9-john.g.garry@oracle.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | large atomic writes for xfs with CoW | expand |
On Thu, Feb 27, 2025 at 06:08:09PM +0000, John Garry wrote: > In cases of an atomic write occurs for misaligned or discontiguous disk > blocks, we will use a CoW-based method to issue the atomic write. > > So, for that case, return -EAGAIN to request that the write be issued in > CoW atomic write mode. The dio write path should detect this, similar to > how misaligned regular DIO writes are handled. > > For normal HW-based mode, when the range which we are atomic writing to > covers a shared data extent, try to allocate a new CoW fork. However, if > we find that what we allocated does not meet atomic write requirements > in terms of length and alignment, then fallback on the CoW-based mode > for the atomic write. > > Signed-off-by: John Garry <john.g.garry@oracle.com> Looks reasonable, Reviewed-by: "Darrick J. Wong" <djwong@kernel.org> --D > --- > fs/xfs/xfs_iomap.c | 143 ++++++++++++++++++++++++++++++++++++++++++++- > fs/xfs/xfs_iomap.h | 1 + > 2 files changed, 142 insertions(+), 2 deletions(-) > > diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c > index edfc038bf728..573108cbdc5c 100644 > --- a/fs/xfs/xfs_iomap.c > +++ b/fs/xfs/xfs_iomap.c > @@ -795,6 +795,23 @@ imap_spans_range( > return true; > } > > +static bool > +xfs_bmap_valid_for_atomic_write( > + struct xfs_bmbt_irec *imap, > + xfs_fileoff_t offset_fsb, > + xfs_fileoff_t end_fsb) > +{ > + /* Misaligned start block wrt size */ > + if (!IS_ALIGNED(imap->br_startblock, imap->br_blockcount)) > + return false; > + > + /* Discontiguous or mixed extents */ > + if (!imap_spans_range(imap, offset_fsb, end_fsb)) > + return false; > + > + return true; > +} > + > static int > xfs_direct_write_iomap_begin( > struct inode *inode, > @@ -809,10 +826,13 @@ xfs_direct_write_iomap_begin( > struct xfs_bmbt_irec imap, cmap; > xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); > xfs_fileoff_t end_fsb = xfs_iomap_end_fsb(mp, offset, length); > + xfs_fileoff_t orig_end_fsb = end_fsb; > + bool atomic_hw = flags & IOMAP_ATOMIC_HW; > int nimaps = 1, error = 0; > unsigned int reflink_flags = 0; > bool shared = false; > u16 iomap_flags = 0; > + bool needs_alloc; > unsigned int lockmode; > u64 seq; > > @@ -871,13 +891,37 @@ xfs_direct_write_iomap_begin( > &lockmode, reflink_flags); > if (error) > goto out_unlock; > - if (shared) > + if (shared) { > + if (atomic_hw && > + !xfs_bmap_valid_for_atomic_write(&cmap, > + offset_fsb, end_fsb)) { > + error = -EAGAIN; > + goto out_unlock; > + } > goto out_found_cow; > + } > end_fsb = imap.br_startoff + imap.br_blockcount; > length = XFS_FSB_TO_B(mp, end_fsb) - offset; > } > > - if (imap_needs_alloc(inode, flags, &imap, nimaps)) > + needs_alloc = imap_needs_alloc(inode, flags, &imap, nimaps); > + > + if (atomic_hw) { > + error = -EAGAIN; > + /* > + * Use CoW method for when we need to alloc > 1 block, > + * otherwise we might allocate less than what we need here and > + * have multiple mappings. > + */ > + if (needs_alloc && orig_end_fsb - offset_fsb > 1) > + goto out_unlock; > + > + if (!xfs_bmap_valid_for_atomic_write(&imap, offset_fsb, > + orig_end_fsb)) > + goto out_unlock; > + } > + > + if (needs_alloc) > goto allocate_blocks; > > /* > @@ -965,6 +1009,101 @@ const struct iomap_ops xfs_direct_write_iomap_ops = { > .iomap_begin = xfs_direct_write_iomap_begin, > }; > > +static int > +xfs_atomic_write_sw_iomap_begin( > + struct inode *inode, > + loff_t offset, > + loff_t length, > + unsigned flags, > + struct iomap *iomap, > + struct iomap *srcmap) > +{ > + struct xfs_inode *ip = XFS_I(inode); > + struct xfs_mount *mp = ip->i_mount; > + struct xfs_bmbt_irec imap, cmap; > + xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); > + xfs_fileoff_t end_fsb = xfs_iomap_end_fsb(mp, offset, length); > + int nimaps = 1, error; > + unsigned int reflink_flags; > + bool shared = false; > + u16 iomap_flags = 0; > + unsigned int lockmode = XFS_ILOCK_EXCL; > + u64 seq; > + > + if (xfs_is_shutdown(mp)) > + return -EIO; > + > + reflink_flags = XFS_REFLINK_CONVERT | XFS_REFLINK_ATOMIC_SW; > + > + /* > + * Set IOMAP_F_DIRTY similar to xfs_atomic_write_iomap_begin() > + */ > + if (offset + length > i_size_read(inode)) > + iomap_flags |= IOMAP_F_DIRTY; > + > + error = xfs_ilock_for_iomap(ip, flags, &lockmode); > + if (error) > + return error; > + > + error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap, > + &nimaps, 0); > + if (error) > + goto out_unlock; > + > + error = xfs_reflink_allocate_cow(ip, &imap, &cmap, &shared, > + &lockmode, reflink_flags); > + /* > + * Don't check @shared. For atomic writes, we should error when > + * we don't get a COW mapping > + */ > + if (error) > + goto out_unlock; > + > + end_fsb = imap.br_startoff + imap.br_blockcount; > + > + length = XFS_FSB_TO_B(mp, cmap.br_startoff + cmap.br_blockcount); > + trace_xfs_iomap_found(ip, offset, length - offset, XFS_COW_FORK, &cmap); > + if (imap.br_startblock != HOLESTARTBLOCK) { > + seq = xfs_iomap_inode_sequence(ip, 0); > + error = xfs_bmbt_to_iomap(ip, srcmap, &imap, flags, 0, seq); > + if (error) > + goto out_unlock; > + } > + seq = xfs_iomap_inode_sequence(ip, IOMAP_F_SHARED); > + xfs_iunlock(ip, lockmode); > + return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, IOMAP_F_SHARED, seq); > + > +out_unlock: > + if (lockmode) > + xfs_iunlock(ip, lockmode); > + return error; > +} > + > +static int > +xfs_atomic_write_iomap_begin( > + struct inode *inode, > + loff_t offset, > + loff_t length, > + unsigned flags, > + struct iomap *iomap, > + struct iomap *srcmap) > +{ > + ASSERT(flags & IOMAP_WRITE); > + ASSERT(flags & IOMAP_DIRECT); > + > + if (flags & IOMAP_ATOMIC_SW) > + return xfs_atomic_write_sw_iomap_begin(inode, offset, length, > + flags, iomap, srcmap); > + > + ASSERT(flags & IOMAP_ATOMIC_HW); > + return xfs_direct_write_iomap_begin(inode, offset, length, flags, > + iomap, srcmap); > +} > + > +const struct iomap_ops xfs_atomic_write_iomap_ops = { > + .iomap_begin = xfs_atomic_write_iomap_begin, > +}; > + > static int > xfs_dax_write_iomap_end( > struct inode *inode, > diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h > index 8347268af727..b7fbbc909943 100644 > --- a/fs/xfs/xfs_iomap.h > +++ b/fs/xfs/xfs_iomap.h > @@ -53,5 +53,6 @@ extern const struct iomap_ops xfs_read_iomap_ops; > extern const struct iomap_ops xfs_seek_iomap_ops; > extern const struct iomap_ops xfs_xattr_iomap_ops; > extern const struct iomap_ops xfs_dax_write_iomap_ops; > +extern const struct iomap_ops xfs_atomic_write_iomap_ops; > > #endif /* __XFS_IOMAP_H__*/ > -- > 2.31.1 >
Hi John, kernel test robot noticed the following build warnings: [auto build test WARNING on xfs-linux/for-next] [also build test WARNING on tytso-ext4/dev linus/master v6.14-rc4] [cannot apply to brauner-vfs/vfs.all next-20250228] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch#_base_tree_information] url: https://github.com/intel-lab-lkp/linux/commits/John-Garry/xfs-Pass-flags-to-xfs_reflink_allocate_cow/20250228-021818 base: https://git.kernel.org/pub/scm/fs/xfs/xfs-linux.git for-next patch link: https://lore.kernel.org/r/20250227180813.1553404-9-john.g.garry%40oracle.com patch subject: [PATCH v3 08/12] xfs: Iomap SW-based atomic write support config: hexagon-randconfig-001-20250302 (https://download.01.org/0day-ci/archive/20250302/202503020355.fr9QWxQJ-lkp@intel.com/config) compiler: clang version 21.0.0git (https://github.com/llvm/llvm-project 14170b16028c087ca154878f5ed93d3089a965c6) reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250302/202503020355.fr9QWxQJ-lkp@intel.com/reproduce) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <lkp@intel.com> | Closes: https://lore.kernel.org/oe-kbuild-all/202503020355.fr9QWxQJ-lkp@intel.com/ All warnings (new ones prefixed by >>): >> fs/xfs/xfs_iomap.c:1029:8: warning: variable 'iomap_flags' set but not used [-Wunused-but-set-variable] 1029 | u16 iomap_flags = 0; | ^ 1 warning generated. vim +/iomap_flags +1029 fs/xfs/xfs_iomap.c 1011 1012 static int 1013 xfs_atomic_write_sw_iomap_begin( 1014 struct inode *inode, 1015 loff_t offset, 1016 loff_t length, 1017 unsigned flags, 1018 struct iomap *iomap, 1019 struct iomap *srcmap) 1020 { 1021 struct xfs_inode *ip = XFS_I(inode); 1022 struct xfs_mount *mp = ip->i_mount; 1023 struct xfs_bmbt_irec imap, cmap; 1024 xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); 1025 xfs_fileoff_t end_fsb = xfs_iomap_end_fsb(mp, offset, length); 1026 int nimaps = 1, error; 1027 unsigned int reflink_flags; 1028 bool shared = false; > 1029 u16 iomap_flags = 0; 1030 unsigned int lockmode = XFS_ILOCK_EXCL; 1031 u64 seq; 1032 1033 if (xfs_is_shutdown(mp)) 1034 return -EIO; 1035 1036 reflink_flags = XFS_REFLINK_CONVERT | XFS_REFLINK_ATOMIC_SW; 1037 1038 /* 1039 * Set IOMAP_F_DIRTY similar to xfs_atomic_write_iomap_begin() 1040 */ 1041 if (offset + length > i_size_read(inode)) 1042 iomap_flags |= IOMAP_F_DIRTY; 1043 1044 error = xfs_ilock_for_iomap(ip, flags, &lockmode); 1045 if (error) 1046 return error; 1047 1048 error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap, 1049 &nimaps, 0); 1050 if (error) 1051 goto out_unlock; 1052 1053 error = xfs_reflink_allocate_cow(ip, &imap, &cmap, &shared, 1054 &lockmode, reflink_flags); 1055 /* 1056 * Don't check @shared. For atomic writes, we should error when 1057 * we don't get a COW mapping 1058 */ 1059 if (error) 1060 goto out_unlock; 1061 1062 end_fsb = imap.br_startoff + imap.br_blockcount; 1063 1064 length = XFS_FSB_TO_B(mp, cmap.br_startoff + cmap.br_blockcount); 1065 trace_xfs_iomap_found(ip, offset, length - offset, XFS_COW_FORK, &cmap); 1066 if (imap.br_startblock != HOLESTARTBLOCK) { 1067 seq = xfs_iomap_inode_sequence(ip, 0); 1068 error = xfs_bmbt_to_iomap(ip, srcmap, &imap, flags, 0, seq); 1069 if (error) 1070 goto out_unlock; 1071 } 1072 seq = xfs_iomap_inode_sequence(ip, IOMAP_F_SHARED); 1073 xfs_iunlock(ip, lockmode); 1074 return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, IOMAP_F_SHARED, seq); 1075 1076 out_unlock: 1077 if (lockmode) 1078 xfs_iunlock(ip, lockmode); 1079 return error; 1080 } 1081
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index edfc038bf728..573108cbdc5c 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -795,6 +795,23 @@ imap_spans_range( return true; } +static bool +xfs_bmap_valid_for_atomic_write( + struct xfs_bmbt_irec *imap, + xfs_fileoff_t offset_fsb, + xfs_fileoff_t end_fsb) +{ + /* Misaligned start block wrt size */ + if (!IS_ALIGNED(imap->br_startblock, imap->br_blockcount)) + return false; + + /* Discontiguous or mixed extents */ + if (!imap_spans_range(imap, offset_fsb, end_fsb)) + return false; + + return true; +} + static int xfs_direct_write_iomap_begin( struct inode *inode, @@ -809,10 +826,13 @@ xfs_direct_write_iomap_begin( struct xfs_bmbt_irec imap, cmap; xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); xfs_fileoff_t end_fsb = xfs_iomap_end_fsb(mp, offset, length); + xfs_fileoff_t orig_end_fsb = end_fsb; + bool atomic_hw = flags & IOMAP_ATOMIC_HW; int nimaps = 1, error = 0; unsigned int reflink_flags = 0; bool shared = false; u16 iomap_flags = 0; + bool needs_alloc; unsigned int lockmode; u64 seq; @@ -871,13 +891,37 @@ xfs_direct_write_iomap_begin( &lockmode, reflink_flags); if (error) goto out_unlock; - if (shared) + if (shared) { + if (atomic_hw && + !xfs_bmap_valid_for_atomic_write(&cmap, + offset_fsb, end_fsb)) { + error = -EAGAIN; + goto out_unlock; + } goto out_found_cow; + } end_fsb = imap.br_startoff + imap.br_blockcount; length = XFS_FSB_TO_B(mp, end_fsb) - offset; } - if (imap_needs_alloc(inode, flags, &imap, nimaps)) + needs_alloc = imap_needs_alloc(inode, flags, &imap, nimaps); + + if (atomic_hw) { + error = -EAGAIN; + /* + * Use CoW method for when we need to alloc > 1 block, + * otherwise we might allocate less than what we need here and + * have multiple mappings. + */ + if (needs_alloc && orig_end_fsb - offset_fsb > 1) + goto out_unlock; + + if (!xfs_bmap_valid_for_atomic_write(&imap, offset_fsb, + orig_end_fsb)) + goto out_unlock; + } + + if (needs_alloc) goto allocate_blocks; /* @@ -965,6 +1009,101 @@ const struct iomap_ops xfs_direct_write_iomap_ops = { .iomap_begin = xfs_direct_write_iomap_begin, }; +static int +xfs_atomic_write_sw_iomap_begin( + struct inode *inode, + loff_t offset, + loff_t length, + unsigned flags, + struct iomap *iomap, + struct iomap *srcmap) +{ + struct xfs_inode *ip = XFS_I(inode); + struct xfs_mount *mp = ip->i_mount; + struct xfs_bmbt_irec imap, cmap; + xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); + xfs_fileoff_t end_fsb = xfs_iomap_end_fsb(mp, offset, length); + int nimaps = 1, error; + unsigned int reflink_flags; + bool shared = false; + u16 iomap_flags = 0; + unsigned int lockmode = XFS_ILOCK_EXCL; + u64 seq; + + if (xfs_is_shutdown(mp)) + return -EIO; + + reflink_flags = XFS_REFLINK_CONVERT | XFS_REFLINK_ATOMIC_SW; + + /* + * Set IOMAP_F_DIRTY similar to xfs_atomic_write_iomap_begin() + */ + if (offset + length > i_size_read(inode)) + iomap_flags |= IOMAP_F_DIRTY; + + error = xfs_ilock_for_iomap(ip, flags, &lockmode); + if (error) + return error; + + error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap, + &nimaps, 0); + if (error) + goto out_unlock; + + error = xfs_reflink_allocate_cow(ip, &imap, &cmap, &shared, + &lockmode, reflink_flags); + /* + * Don't check @shared. For atomic writes, we should error when + * we don't get a COW mapping + */ + if (error) + goto out_unlock; + + end_fsb = imap.br_startoff + imap.br_blockcount; + + length = XFS_FSB_TO_B(mp, cmap.br_startoff + cmap.br_blockcount); + trace_xfs_iomap_found(ip, offset, length - offset, XFS_COW_FORK, &cmap); + if (imap.br_startblock != HOLESTARTBLOCK) { + seq = xfs_iomap_inode_sequence(ip, 0); + error = xfs_bmbt_to_iomap(ip, srcmap, &imap, flags, 0, seq); + if (error) + goto out_unlock; + } + seq = xfs_iomap_inode_sequence(ip, IOMAP_F_SHARED); + xfs_iunlock(ip, lockmode); + return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, IOMAP_F_SHARED, seq); + +out_unlock: + if (lockmode) + xfs_iunlock(ip, lockmode); + return error; +} + +static int +xfs_atomic_write_iomap_begin( + struct inode *inode, + loff_t offset, + loff_t length, + unsigned flags, + struct iomap *iomap, + struct iomap *srcmap) +{ + ASSERT(flags & IOMAP_WRITE); + ASSERT(flags & IOMAP_DIRECT); + + if (flags & IOMAP_ATOMIC_SW) + return xfs_atomic_write_sw_iomap_begin(inode, offset, length, + flags, iomap, srcmap); + + ASSERT(flags & IOMAP_ATOMIC_HW); + return xfs_direct_write_iomap_begin(inode, offset, length, flags, + iomap, srcmap); +} + +const struct iomap_ops xfs_atomic_write_iomap_ops = { + .iomap_begin = xfs_atomic_write_iomap_begin, +}; + static int xfs_dax_write_iomap_end( struct inode *inode, diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h index 8347268af727..b7fbbc909943 100644 --- a/fs/xfs/xfs_iomap.h +++ b/fs/xfs/xfs_iomap.h @@ -53,5 +53,6 @@ extern const struct iomap_ops xfs_read_iomap_ops; extern const struct iomap_ops xfs_seek_iomap_ops; extern const struct iomap_ops xfs_xattr_iomap_ops; extern const struct iomap_ops xfs_dax_write_iomap_ops; +extern const struct iomap_ops xfs_atomic_write_iomap_ops; #endif /* __XFS_IOMAP_H__*/
In cases of an atomic write occurs for misaligned or discontiguous disk blocks, we will use a CoW-based method to issue the atomic write. So, for that case, return -EAGAIN to request that the write be issued in CoW atomic write mode. The dio write path should detect this, similar to how misaligned regular DIO writes are handled. For normal HW-based mode, when the range which we are atomic writing to covers a shared data extent, try to allocate a new CoW fork. However, if we find that what we allocated does not meet atomic write requirements in terms of length and alignment, then fallback on the CoW-based mode for the atomic write. Signed-off-by: John Garry <john.g.garry@oracle.com> --- fs/xfs/xfs_iomap.c | 143 ++++++++++++++++++++++++++++++++++++++++++++- fs/xfs/xfs_iomap.h | 1 + 2 files changed, 142 insertions(+), 2 deletions(-)