Message ID | 20210309050124.23797-6-chandanrlinux@gmail.com (mailing list archive) |
---|---|
State | Accepted, archived |
Headers | show |
Series | xfs: Tests to verify inode fork extent count overflow detection | expand |
On 3/8/21 10:01 PM, Chandan Babu R wrote: > Verify that XFS does not cause realtime bitmap/summary inode fork's > extent count to overflow when growing the realtime volume associated > with a filesystem. > > Reviewed-by: Darrick J. Wong <djwong@kernel.org> > Signed-off-by: Chandan Babu R <chandanrlinux@gmail.com> Ok, makes sense Reviewed-by: Allison Henderson <allison.henderson@oracle.com> > --- > tests/xfs/529 | 124 ++++++++++++++++++++++++++++++++++++++++++++++ > tests/xfs/529.out | 11 ++++ > tests/xfs/group | 1 + > 3 files changed, 136 insertions(+) > create mode 100755 tests/xfs/529 > create mode 100644 tests/xfs/529.out > > diff --git a/tests/xfs/529 b/tests/xfs/529 > new file mode 100755 > index 00000000..dd7019f5 > --- /dev/null > +++ b/tests/xfs/529 > @@ -0,0 +1,124 @@ > +#! /bin/bash > +# SPDX-License-Identifier: GPL-2.0 > +# Copyright (c) 2021 Chandan Babu R. All Rights Reserved. > +# > +# FS QA Test 529 > +# > +# Verify that XFS does not cause bitmap/summary inode fork's extent count to > +# overflow when growing an the realtime volume of the filesystem. > +# > +seq=`basename $0` > +seqres=$RESULT_DIR/$seq > +echo "QA output created by $seq" > + > +here=`pwd` > +tmp=/tmp/$$ > +status=1 # failure is the default! > +trap "_cleanup; exit \$status" 0 1 2 3 15 > + > +_cleanup() > +{ > + cd / > + _scratch_unmount >> $seqres.full 2>&1 > + test -e "$rtdev" && losetup -d $rtdev >> $seqres.full 2>&1 > + rm -f $tmp.* $TEST_DIR/$seq.rtvol > +} > + > +# get standard environment, filters and checks > +. ./common/rc > +. ./common/filter > +. ./common/inject > +. ./common/populate > + > +# remove previous $seqres.full before test > +rm -f $seqres.full > + > +# real QA test starts here > + > +_supported_fs xfs > +# Note that we don't _require_realtime because we synthesize a rt volume > +# below. > +_require_test > +_require_xfs_debug > +_require_test_program "punch-alternating" > +_require_xfs_io_error_injection "reduce_max_iextents" > +_require_xfs_io_error_injection "bmap_alloc_minlen_extent" > +_require_scratch_nocheck > + > +echo "* Test extending rt inodes" > + > +_scratch_mkfs | _filter_mkfs >> $seqres.full 2> $tmp.mkfs > +. $tmp.mkfs > + > +echo "Create fake rt volume" > +nr_bitmap_blks=25 > +nr_bits=$((nr_bitmap_blks * dbsize * 8)) > + > +# Realtime extent size has to be atleast 4k in size. > +if (( $dbsize < 4096 )); then > + rtextsz=4096 > +else > + rtextsz=$dbsize > +fi > + > +rtdevsz=$((nr_bits * rtextsz)) > +truncate -s $rtdevsz $TEST_DIR/$seq.rtvol > +rtdev=$(_create_loop_device $TEST_DIR/$seq.rtvol) > + > +echo "Format and mount rt volume" > + > +export USE_EXTERNAL=yes > +export SCRATCH_RTDEV=$rtdev > +_scratch_mkfs -d size=$((1024 * 1024 * 1024)) -b size=${dbsize} \ > + -r size=${rtextsz},extsize=${rtextsz} >> $seqres.full > +_try_scratch_mount || _notrun "Couldn't mount fs with synthetic rt volume" > + > +echo "Consume free space" > +fillerdir=$SCRATCH_MNT/fillerdir > +nr_free_blks=$(stat -f -c '%f' $SCRATCH_MNT) > +nr_free_blks=$((nr_free_blks * 90 / 100)) > + > +_fill_fs $((dbsize * nr_free_blks)) $fillerdir $dbsize 0 >> $seqres.full 2>&1 > + > +echo "Create fragmented filesystem" > +for dentry in $(ls -1 $fillerdir/); do > + $here/src/punch-alternating $fillerdir/$dentry >> $seqres.full > +done > + > +echo "Inject reduce_max_iextents error tag" > +_scratch_inject_error reduce_max_iextents 1 > + > +echo "Inject bmap_alloc_minlen_extent error tag" > +_scratch_inject_error bmap_alloc_minlen_extent 1 > + > +echo "Grow realtime volume" > +$XFS_GROWFS_PROG -r $SCRATCH_MNT >> $seqres.full 2>&1 > +if [[ $? == 0 ]]; then > + echo "Growfs succeeded; should have failed." > + exit 1 > +fi > + > +_scratch_unmount >> $seqres.full > + > +echo "Verify rbmino's and rsumino's extent count" > +for rtino in rbmino rsumino; do > + ino=$(_scratch_xfs_get_metadata_field $rtino "sb 0") > + echo "$rtino = $ino" >> $seqres.full > + > + nextents=$(_scratch_get_iext_count $ino data || \ > + _fail "Unable to obtain inode fork's extent count") > + if (( $nextents > 10 )); then > + echo "Extent count overflow check failed: nextents = $nextents" > + exit 1 > + fi > +done > + > +echo "Check filesystem" > +_check_xfs_filesystem $SCRATCH_DEV none $rtdev > + > +losetup -d $rtdev > +rm -f $TEST_DIR/$seq.rtvol > + > +# success, all done > +status=0 > +exit > diff --git a/tests/xfs/529.out b/tests/xfs/529.out > new file mode 100644 > index 00000000..4ee113a4 > --- /dev/null > +++ b/tests/xfs/529.out > @@ -0,0 +1,11 @@ > +QA output created by 529 > +* Test extending rt inodes > +Create fake rt volume > +Format and mount rt volume > +Consume free space > +Create fragmented filesystem > +Inject reduce_max_iextents error tag > +Inject bmap_alloc_minlen_extent error tag > +Grow realtime volume > +Verify rbmino's and rsumino's extent count > +Check filesystem > diff --git a/tests/xfs/group b/tests/xfs/group > index 2356c4a9..5dff7acb 100644 > --- a/tests/xfs/group > +++ b/tests/xfs/group > @@ -526,3 +526,4 @@ > 526 auto quick mkfs > 527 auto quick quota > 528 auto quick quota > +529 auto quick realtime growfs >
On Tue, Mar 09, 2021 at 10:31:16AM +0530, Chandan Babu R wrote: > Verify that XFS does not cause realtime bitmap/summary inode fork's > extent count to overflow when growing the realtime volume associated > with a filesystem. > > Reviewed-by: Darrick J. Wong <djwong@kernel.org> > Signed-off-by: Chandan Babu R <chandanrlinux@gmail.com> Soo... I discovered that this test doesn't pass with multiblock directories: FSTYP -- xfs (debug) PLATFORM -- Linux/x86_64 alder-mtr00 5.12.0-rc4-xfsx #rc4 SMP PREEMPT Mon Mar 22 10:03:45 PDT 2021 MKFS_OPTIONS -- -f -b size=1024, /dev/sdf MOUNT_OPTIONS -- -o usrquota,grpquota,prjquota, /dev/sdf /opt xfs/529 - output mismatch (see /var/tmp/fstests/xfs/529.out.bad) --- tests/xfs/529.out 2021-03-21 11:44:09.383407733 -0700 +++ /var/tmp/fstests/xfs/529.out.bad 2021-03-22 10:36:34.000348426 -0700 @@ -4,12 +4,21 @@ Inject reduce_max_iextents error tag Create fragmented file Verify $testfile's extent count +/opt/testfile: No such file or directory +/tmp/fstests/tests/xfs/529: line 72: ((: > 10 : syntax error: operand expected (error token is "> 10 ") +rm: cannot remove '/opt/testfile': No such file or directory * Fallocate unwritten extents ... (Run 'diff -u /tmp/fstests/tests/xfs/529.out /var/tmp/fstests/xfs/529.out.bad' to see the entire diff) Ran: xfs/529 Failures: xfs/529 Failed 1 of 1 tests Test xfs/529 FAILED with code 1 and bad golden output: --- /tmp/fstests/tests/xfs/529.out 2021-03-21 11:44:09.383407733 -0700 +++ /var/tmp/fstests/xfs/529.out.bad 2021-03-22 10:36:34.000348426 -0700 @@ -4,12 +4,21 @@ Inject reduce_max_iextents error tag Create fragmented file Verify $testfile's extent count +/opt/testfile: No such file or directory +/tmp/fstests/tests/xfs/529: line 72: ((: > 10 : syntax error: operand expected (error token is "> 10 ") +rm: cannot remove '/opt/testfile': No such file or directory * Fallocate unwritten extents Fallocate fragmented file Verify $testfile's extent count +/opt/testfile: No such file or directory +/tmp/fstests/tests/xfs/529: line 91: ((: > 10 : syntax error: operand expected (error token is "> 10 ") +rm: cannot remove '/opt/testfile': No such file or directory * Directio write Create fragmented file via directio writes Verify $testfile's extent count +/opt/testfile: No such file or directory +/tmp/fstests/tests/xfs/529: line 110: ((: > 10 : syntax error: operand expected (error token is "> 10 ") +rm: cannot remove '/opt/testfile': No such file or directory * Extend quota inodes Disable reduce_max_iextents error tag Consume free space The test appears to fail because we cannot create even a single file in the root directory. Looking at xfs_create, I see: error = xfs_iext_count_may_overflow(dp, XFS_DATA_FORK, XFS_IEXT_DIR_MANIP_CNT(mp)); if (error) goto out_trans_cancel; XFS_IEXT_DIR_MANIP_CNT is defined as: #define XFS_IEXT_DIR_MANIP_CNT(mp) \ ((XFS_DA_NODE_MAXDEPTH + 1 + 1) * (mp)->m_dir_geo->fsbcount) If one formats a filesystem with 1k blocks, the result will be a filesystem with 4k directory blocks: # mkfs.xfs -b size=1024 /dev/sdf -Nf meta-data=/dev/sdf isize=512 agcount=4, agsize=5192704 blks = sectsz=512 attr=2, projid32bit=1 = crc=1 finobt=1, sparse=1, rmapbt=1 = reflink=1 bigtime=1 inobtcount=1 = metadir=0 data = bsize=1024 blocks=20770816, imaxpct=25 = sunit=0 swidth=0 blks naming =version 2 bsize=4096 ascii-ci=0, ftype=1 log =internal log bsize=1024 blocks=10240, version=2 = sectsz=512 sunit=0 blks, lazy-count=1 realtime =none extsz=4096 blocks=0, rtextents=0 Note "data bsize" is 1024, and "naming bsize" is 4096. In the kernel, we set m_dir_geo->fsbcount = "naming bsize" / "data bsize", or 4 in this case. Since XFS_DA_NODE_MAXDEPTH is always 5, this macro expands to: (5 + 1 + 1) * (4) = 28 The reason for the test failure I think is because of this code in xfs_iext_count_may_overflow, which is called from xfs_create on the parent directory: if (XFS_TEST_ERROR(false, ip->i_mount, XFS_ERRTAG_REDUCE_MAX_IEXTENTS)) max_exts = 10; nr_exts = ifp->if_nextents + nr_to_add; if (nr_exts < ifp->if_nextents || nr_exts > max_ext) return -EFBIG The second part of the if statement becomes (28 > 10) which is trivially true, so we return -EFBIG for all attempts to create a file in a directory. xfs/529, in turn, cannot create $testfile because nothing can create a file in $SCRATCH_MNT, and the test goes off the rails. I think this can be trivially solved by changing this (and the other tests) to ensure that the error injection is only set when we're running a command to check if we get EFBIG. In other words, this code in xfs/529: rm $testfile echo "* Fallocate unwritten extents" echo "Fallocate fragmented file" for i in $(seq 0 2 $((nr_blks - 1))); do $XFS_IO_PROG -f -c "falloc $((i * bsize)) $bsize" $testfile \ >> $seqres.full 2>&1 [[ $? != 0 ]] && break done Should become: rm -f $testfile touch $testfile echo "* Fallocate unwritten extents" echo "Fallocate fragmented file" _scratch_inject_error reduce_max_iextents 1 for i in $(seq 0 2 $((nr_blks - 1))); do $XFS_IO_PROG -c "falloc $((i * bsize)) $bsize" $testfile \ >> $seqres.full 2>&1 [[ $? != 0 ]] && break done _scratch_inject_error reduce_max_iextents 0 With that patched up, xfs/529 passes on 1k block filesystems. I suspect the other tests in this series (xfs/531, 532, 534, and 535) are going to need similar patching. --D > --- > tests/xfs/529 | 124 ++++++++++++++++++++++++++++++++++++++++++++++ > tests/xfs/529.out | 11 ++++ > tests/xfs/group | 1 + > 3 files changed, 136 insertions(+) > create mode 100755 tests/xfs/529 > create mode 100644 tests/xfs/529.out > > diff --git a/tests/xfs/529 b/tests/xfs/529 > new file mode 100755 > index 00000000..dd7019f5 > --- /dev/null > +++ b/tests/xfs/529 > @@ -0,0 +1,124 @@ > +#! /bin/bash > +# SPDX-License-Identifier: GPL-2.0 > +# Copyright (c) 2021 Chandan Babu R. All Rights Reserved. > +# > +# FS QA Test 529 > +# > +# Verify that XFS does not cause bitmap/summary inode fork's extent count to > +# overflow when growing an the realtime volume of the filesystem. > +# > +seq=`basename $0` > +seqres=$RESULT_DIR/$seq > +echo "QA output created by $seq" > + > +here=`pwd` > +tmp=/tmp/$$ > +status=1 # failure is the default! > +trap "_cleanup; exit \$status" 0 1 2 3 15 > + > +_cleanup() > +{ > + cd / > + _scratch_unmount >> $seqres.full 2>&1 > + test -e "$rtdev" && losetup -d $rtdev >> $seqres.full 2>&1 > + rm -f $tmp.* $TEST_DIR/$seq.rtvol > +} > + > +# get standard environment, filters and checks > +. ./common/rc > +. ./common/filter > +. ./common/inject > +. ./common/populate > + > +# remove previous $seqres.full before test > +rm -f $seqres.full > + > +# real QA test starts here > + > +_supported_fs xfs > +# Note that we don't _require_realtime because we synthesize a rt volume > +# below. > +_require_test > +_require_xfs_debug > +_require_test_program "punch-alternating" > +_require_xfs_io_error_injection "reduce_max_iextents" > +_require_xfs_io_error_injection "bmap_alloc_minlen_extent" > +_require_scratch_nocheck > + > +echo "* Test extending rt inodes" > + > +_scratch_mkfs | _filter_mkfs >> $seqres.full 2> $tmp.mkfs > +. $tmp.mkfs > + > +echo "Create fake rt volume" > +nr_bitmap_blks=25 > +nr_bits=$((nr_bitmap_blks * dbsize * 8)) > + > +# Realtime extent size has to be atleast 4k in size. > +if (( $dbsize < 4096 )); then > + rtextsz=4096 > +else > + rtextsz=$dbsize > +fi > + > +rtdevsz=$((nr_bits * rtextsz)) > +truncate -s $rtdevsz $TEST_DIR/$seq.rtvol > +rtdev=$(_create_loop_device $TEST_DIR/$seq.rtvol) > + > +echo "Format and mount rt volume" > + > +export USE_EXTERNAL=yes > +export SCRATCH_RTDEV=$rtdev > +_scratch_mkfs -d size=$((1024 * 1024 * 1024)) -b size=${dbsize} \ > + -r size=${rtextsz},extsize=${rtextsz} >> $seqres.full > +_try_scratch_mount || _notrun "Couldn't mount fs with synthetic rt volume" > + > +echo "Consume free space" > +fillerdir=$SCRATCH_MNT/fillerdir > +nr_free_blks=$(stat -f -c '%f' $SCRATCH_MNT) > +nr_free_blks=$((nr_free_blks * 90 / 100)) > + > +_fill_fs $((dbsize * nr_free_blks)) $fillerdir $dbsize 0 >> $seqres.full 2>&1 > + > +echo "Create fragmented filesystem" > +for dentry in $(ls -1 $fillerdir/); do > + $here/src/punch-alternating $fillerdir/$dentry >> $seqres.full > +done > + > +echo "Inject reduce_max_iextents error tag" > +_scratch_inject_error reduce_max_iextents 1 > + > +echo "Inject bmap_alloc_minlen_extent error tag" > +_scratch_inject_error bmap_alloc_minlen_extent 1 > + > +echo "Grow realtime volume" > +$XFS_GROWFS_PROG -r $SCRATCH_MNT >> $seqres.full 2>&1 > +if [[ $? == 0 ]]; then > + echo "Growfs succeeded; should have failed." > + exit 1 > +fi > + > +_scratch_unmount >> $seqres.full > + > +echo "Verify rbmino's and rsumino's extent count" > +for rtino in rbmino rsumino; do > + ino=$(_scratch_xfs_get_metadata_field $rtino "sb 0") > + echo "$rtino = $ino" >> $seqres.full > + > + nextents=$(_scratch_get_iext_count $ino data || \ > + _fail "Unable to obtain inode fork's extent count") > + if (( $nextents > 10 )); then > + echo "Extent count overflow check failed: nextents = $nextents" > + exit 1 > + fi > +done > + > +echo "Check filesystem" > +_check_xfs_filesystem $SCRATCH_DEV none $rtdev > + > +losetup -d $rtdev > +rm -f $TEST_DIR/$seq.rtvol > + > +# success, all done > +status=0 > +exit > diff --git a/tests/xfs/529.out b/tests/xfs/529.out > new file mode 100644 > index 00000000..4ee113a4 > --- /dev/null > +++ b/tests/xfs/529.out > @@ -0,0 +1,11 @@ > +QA output created by 529 > +* Test extending rt inodes > +Create fake rt volume > +Format and mount rt volume > +Consume free space > +Create fragmented filesystem > +Inject reduce_max_iextents error tag > +Inject bmap_alloc_minlen_extent error tag > +Grow realtime volume > +Verify rbmino's and rsumino's extent count > +Check filesystem > diff --git a/tests/xfs/group b/tests/xfs/group > index 2356c4a9..5dff7acb 100644 > --- a/tests/xfs/group > +++ b/tests/xfs/group > @@ -526,3 +526,4 @@ > 526 auto quick mkfs > 527 auto quick quota > 528 auto quick quota > +529 auto quick realtime growfs > -- > 2.29.2 >
On 22 Mar 2021 at 23:26, Darrick J. Wong wrote: > On Tue, Mar 09, 2021 at 10:31:16AM +0530, Chandan Babu R wrote: >> Verify that XFS does not cause realtime bitmap/summary inode fork's >> extent count to overflow when growing the realtime volume associated >> with a filesystem. >> >> Reviewed-by: Darrick J. Wong <djwong@kernel.org> >> Signed-off-by: Chandan Babu R <chandanrlinux@gmail.com> > > Soo... I discovered that this test doesn't pass with multiblock > directories: Thanks for the bug report and the description of the corresponding solution. I am fixing the tests and will soon post corresponding patches to the mailing list. -- chandan
On Tue, Mar 23, 2021 at 09:21:27PM +0530, Chandan Babu R wrote: > On 22 Mar 2021 at 23:26, Darrick J. Wong wrote: > > On Tue, Mar 09, 2021 at 10:31:16AM +0530, Chandan Babu R wrote: > >> Verify that XFS does not cause realtime bitmap/summary inode fork's > >> extent count to overflow when growing the realtime volume associated > >> with a filesystem. > >> > >> Reviewed-by: Darrick J. Wong <djwong@kernel.org> > >> Signed-off-by: Chandan Babu R <chandanrlinux@gmail.com> > > > > Soo... I discovered that this test doesn't pass with multiblock > > directories: > > Thanks for the bug report and the description of the corresponding solution. I > am fixing the tests and will soon post corresponding patches to the mailing > list. Also, I found a problem with xfs/534 when it does the direct write tests to a pmem volume with DAX enabled: --- /tmp/fstests/tests/xfs/534.out 2021-03-21 11:44:09.384407426 -0700 +++ /var/tmp/fstests/xfs/534.out.bad 2021-03-23 13:32:15.898301839 -0700 @@ -5,7 +5,4 @@ Fallocate 15 blocks Buffered write to every other block of fallocated space Verify $testfile's extent count -* Direct write to unwritten extent -Fallocate 15 blocks -Direct write to every other block of fallocated space -Verify $testfile's extent count +Extent count overflow check failed: nextents = 11 looking at the xfs_bmap output for $testfile shows: /opt/testfile: EXT: FILE-OFFSET BLOCK-RANGE AG AG-OFFSET TOTAL FLAGS 0: [0..7]: 208..215 0 (208..215) 8 010000 1: [8..15]: 216..223 0 (216..223) 8 000000 2: [16..23]: 224..231 0 (224..231) 8 010000 3: [24..31]: 232..239 0 (232..239) 8 000000 4: [32..39]: 240..247 0 (240..247) 8 010000 5: [40..47]: 248..255 0 (248..255) 8 000000 6: [48..55]: 256..263 0 (256..263) 8 010000 7: [56..63]: 264..271 0 (264..271) 8 000000 8: [64..71]: 272..279 0 (272..279) 8 010000 9: [72..79]: 280..287 0 (280..287) 8 000000 10: [80..119]: 288..327 0 (288..327) 40 010000 Which is ... odd since the same direct write gets cut off after writing to block 7 (like you'd expect since it's the same function) when DAX isn't enabled... ...OH, I see the problem. For a non-DAX direct write, xfs_iomap_write_direct will allocate an unwritten block into a hole, but if the block was already mapped (written or unwritten) it won't do anything at all. For that case, XFS_IEXT_ADD_NOSPLIT_CNT is sufficient, because in the worst case we add one extent to the data fork. For DAX writes, however, the behavior is different: if (IS_DAX(VFS_I(ip))) { bmapi_flags = XFS_BMAPI_CONVERT | XFS_BMAPI_ZERO; if (imap->br_state == XFS_EXT_UNWRITTEN) { force = true; dblocks = XFS_DIOSTRAT_SPACE_RES(mp, 0) << 1; } } This tells xfs_bmapi_write that we want to /convert/ an unwritten extent to written, and we want to zero the blocks. If we're dax-writing into the middle of an unwritten range, this will cause a split. The correct parameter there would be XFS_IEXT_WRITE_UNWRITTEN_CNT. Would you mind sending a kernel patch to fix that? --D > -- > chandan
On 24 Mar 2021 at 02:27, Darrick J. Wong wrote: > On Tue, Mar 23, 2021 at 09:21:27PM +0530, Chandan Babu R wrote: >> On 22 Mar 2021 at 23:26, Darrick J. Wong wrote: >> > On Tue, Mar 09, 2021 at 10:31:16AM +0530, Chandan Babu R wrote: >> >> Verify that XFS does not cause realtime bitmap/summary inode fork's >> >> extent count to overflow when growing the realtime volume associated >> >> with a filesystem. >> >> >> >> Reviewed-by: Darrick J. Wong <djwong@kernel.org> >> >> Signed-off-by: Chandan Babu R <chandanrlinux@gmail.com> >> > >> > Soo... I discovered that this test doesn't pass with multiblock >> > directories: >> >> Thanks for the bug report and the description of the corresponding solution. I >> am fixing the tests and will soon post corresponding patches to the mailing >> list. > > Also, I found a problem with xfs/534 when it does the direct write tests > to a pmem volume with DAX enabled: > > --- /tmp/fstests/tests/xfs/534.out 2021-03-21 11:44:09.384407426 -0700 > +++ /var/tmp/fstests/xfs/534.out.bad 2021-03-23 13:32:15.898301839 -0700 > @@ -5,7 +5,4 @@ > Fallocate 15 blocks > Buffered write to every other block of fallocated space > Verify $testfile's extent count > -* Direct write to unwritten extent > -Fallocate 15 blocks > -Direct write to every other block of fallocated space > -Verify $testfile's extent count > +Extent count overflow check failed: nextents = 11 The inode extent overflow reported above was actually due to the buffered write operation. But it does occur with direct write operation as well. I was able to recreate the bug with an emulated pmem device on my qemu guest. > > looking at the xfs_bmap output for $testfile shows: > > /opt/testfile: > EXT: FILE-OFFSET BLOCK-RANGE AG AG-OFFSET TOTAL FLAGS > 0: [0..7]: 208..215 0 (208..215) 8 010000 > 1: [8..15]: 216..223 0 (216..223) 8 000000 > 2: [16..23]: 224..231 0 (224..231) 8 010000 > 3: [24..31]: 232..239 0 (232..239) 8 000000 > 4: [32..39]: 240..247 0 (240..247) 8 010000 > 5: [40..47]: 248..255 0 (248..255) 8 000000 > 6: [48..55]: 256..263 0 (256..263) 8 010000 > 7: [56..63]: 264..271 0 (264..271) 8 000000 > 8: [64..71]: 272..279 0 (272..279) 8 010000 > 9: [72..79]: 280..287 0 (280..287) 8 000000 > 10: [80..119]: 288..327 0 (288..327) 40 010000 > > Which is ... odd since the same direct write gets cut off after writing > to block 7 (like you'd expect since it's the same function) when DAX > isn't enabled... > > ...OH, I see the problem. For a non-DAX direct write, > xfs_iomap_write_direct will allocate an unwritten block into a hole, but > if the block was already mapped (written or unwritten) it won't do > anything at all. For that case, XFS_IEXT_ADD_NOSPLIT_CNT is sufficient, > because in the worst case we add one extent to the data fork. > > For DAX writes, however, the behavior is different: > > if (IS_DAX(VFS_I(ip))) { > bmapi_flags = XFS_BMAPI_CONVERT | XFS_BMAPI_ZERO; > if (imap->br_state == XFS_EXT_UNWRITTEN) { > force = true; > dblocks = XFS_DIOSTRAT_SPACE_RES(mp, 0) << 1; > } > } > > This tells xfs_bmapi_write that we want to /convert/ an unwritten extent > to written, and we want to zero the blocks. If we're dax-writing into > the middle of an unwritten range, this will cause a split. The correct > parameter there would be XFS_IEXT_WRITE_UNWRITTEN_CNT. Would you mind > sending a kernel patch to fix that? Sure, I will work on fixing both the buffered and direct IO extent overflow issues. Thanks for reporting the bug. -- chandan
On 24 Mar 2021 at 16:16, Chandan Babu R wrote: > On 24 Mar 2021 at 02:27, Darrick J. Wong wrote: >> On Tue, Mar 23, 2021 at 09:21:27PM +0530, Chandan Babu R wrote: >>> On 22 Mar 2021 at 23:26, Darrick J. Wong wrote: >>> > On Tue, Mar 09, 2021 at 10:31:16AM +0530, Chandan Babu R wrote: >>> >> Verify that XFS does not cause realtime bitmap/summary inode fork's >>> >> extent count to overflow when growing the realtime volume associated >>> >> with a filesystem. >>> >> >>> >> Reviewed-by: Darrick J. Wong <djwong@kernel.org> >>> >> Signed-off-by: Chandan Babu R <chandanrlinux@gmail.com> >>> > >>> > Soo... I discovered that this test doesn't pass with multiblock >>> > directories: >>> >>> Thanks for the bug report and the description of the corresponding solution. I >>> am fixing the tests and will soon post corresponding patches to the mailing >>> list. >> >> Also, I found a problem with xfs/534 when it does the direct write tests >> to a pmem volume with DAX enabled: >> >> --- /tmp/fstests/tests/xfs/534.out 2021-03-21 11:44:09.384407426 -0700 >> +++ /var/tmp/fstests/xfs/534.out.bad 2021-03-23 13:32:15.898301839 -0700 >> @@ -5,7 +5,4 @@ >> Fallocate 15 blocks >> Buffered write to every other block of fallocated space >> Verify $testfile's extent count >> -* Direct write to unwritten extent >> -Fallocate 15 blocks >> -Direct write to every other block of fallocated space >> -Verify $testfile's extent count >> +Extent count overflow check failed: nextents = 11 > > The inode extent overflow reported above was actually due to the buffered > write operation. But it does occur with direct write operation as well. I just found out that xfs_direct_write_iomap_ops is used for both buffered and direct IO w.r.t dax devices. Please ignore the above statement. -- chandan
diff --git a/tests/xfs/529 b/tests/xfs/529 new file mode 100755 index 00000000..dd7019f5 --- /dev/null +++ b/tests/xfs/529 @@ -0,0 +1,124 @@ +#! /bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Copyright (c) 2021 Chandan Babu R. All Rights Reserved. +# +# FS QA Test 529 +# +# Verify that XFS does not cause bitmap/summary inode fork's extent count to +# overflow when growing an the realtime volume of the filesystem. +# +seq=`basename $0` +seqres=$RESULT_DIR/$seq +echo "QA output created by $seq" + +here=`pwd` +tmp=/tmp/$$ +status=1 # failure is the default! +trap "_cleanup; exit \$status" 0 1 2 3 15 + +_cleanup() +{ + cd / + _scratch_unmount >> $seqres.full 2>&1 + test -e "$rtdev" && losetup -d $rtdev >> $seqres.full 2>&1 + rm -f $tmp.* $TEST_DIR/$seq.rtvol +} + +# get standard environment, filters and checks +. ./common/rc +. ./common/filter +. ./common/inject +. ./common/populate + +# remove previous $seqres.full before test +rm -f $seqres.full + +# real QA test starts here + +_supported_fs xfs +# Note that we don't _require_realtime because we synthesize a rt volume +# below. +_require_test +_require_xfs_debug +_require_test_program "punch-alternating" +_require_xfs_io_error_injection "reduce_max_iextents" +_require_xfs_io_error_injection "bmap_alloc_minlen_extent" +_require_scratch_nocheck + +echo "* Test extending rt inodes" + +_scratch_mkfs | _filter_mkfs >> $seqres.full 2> $tmp.mkfs +. $tmp.mkfs + +echo "Create fake rt volume" +nr_bitmap_blks=25 +nr_bits=$((nr_bitmap_blks * dbsize * 8)) + +# Realtime extent size has to be atleast 4k in size. +if (( $dbsize < 4096 )); then + rtextsz=4096 +else + rtextsz=$dbsize +fi + +rtdevsz=$((nr_bits * rtextsz)) +truncate -s $rtdevsz $TEST_DIR/$seq.rtvol +rtdev=$(_create_loop_device $TEST_DIR/$seq.rtvol) + +echo "Format and mount rt volume" + +export USE_EXTERNAL=yes +export SCRATCH_RTDEV=$rtdev +_scratch_mkfs -d size=$((1024 * 1024 * 1024)) -b size=${dbsize} \ + -r size=${rtextsz},extsize=${rtextsz} >> $seqres.full +_try_scratch_mount || _notrun "Couldn't mount fs with synthetic rt volume" + +echo "Consume free space" +fillerdir=$SCRATCH_MNT/fillerdir +nr_free_blks=$(stat -f -c '%f' $SCRATCH_MNT) +nr_free_blks=$((nr_free_blks * 90 / 100)) + +_fill_fs $((dbsize * nr_free_blks)) $fillerdir $dbsize 0 >> $seqres.full 2>&1 + +echo "Create fragmented filesystem" +for dentry in $(ls -1 $fillerdir/); do + $here/src/punch-alternating $fillerdir/$dentry >> $seqres.full +done + +echo "Inject reduce_max_iextents error tag" +_scratch_inject_error reduce_max_iextents 1 + +echo "Inject bmap_alloc_minlen_extent error tag" +_scratch_inject_error bmap_alloc_minlen_extent 1 + +echo "Grow realtime volume" +$XFS_GROWFS_PROG -r $SCRATCH_MNT >> $seqres.full 2>&1 +if [[ $? == 0 ]]; then + echo "Growfs succeeded; should have failed." + exit 1 +fi + +_scratch_unmount >> $seqres.full + +echo "Verify rbmino's and rsumino's extent count" +for rtino in rbmino rsumino; do + ino=$(_scratch_xfs_get_metadata_field $rtino "sb 0") + echo "$rtino = $ino" >> $seqres.full + + nextents=$(_scratch_get_iext_count $ino data || \ + _fail "Unable to obtain inode fork's extent count") + if (( $nextents > 10 )); then + echo "Extent count overflow check failed: nextents = $nextents" + exit 1 + fi +done + +echo "Check filesystem" +_check_xfs_filesystem $SCRATCH_DEV none $rtdev + +losetup -d $rtdev +rm -f $TEST_DIR/$seq.rtvol + +# success, all done +status=0 +exit diff --git a/tests/xfs/529.out b/tests/xfs/529.out new file mode 100644 index 00000000..4ee113a4 --- /dev/null +++ b/tests/xfs/529.out @@ -0,0 +1,11 @@ +QA output created by 529 +* Test extending rt inodes +Create fake rt volume +Format and mount rt volume +Consume free space +Create fragmented filesystem +Inject reduce_max_iextents error tag +Inject bmap_alloc_minlen_extent error tag +Grow realtime volume +Verify rbmino's and rsumino's extent count +Check filesystem diff --git a/tests/xfs/group b/tests/xfs/group index 2356c4a9..5dff7acb 100644 --- a/tests/xfs/group +++ b/tests/xfs/group @@ -526,3 +526,4 @@ 526 auto quick mkfs 527 auto quick quota 528 auto quick quota +529 auto quick realtime growfs