diff mbox series

generic: add gc stress test

Message ID 20240415112259.21760-1-hans.holmberg@wdc.com (mailing list archive)
State New, archived
Headers show
Series generic: add gc stress test | expand

Commit Message

Hans Holmberg April 15, 2024, 11:23 a.m. UTC
This test stresses garbage collection for file systems by first filling
up a scratch mount to a specific usage point with files of random size,
then doing overwrites in parallel with deletes to fragment the backing
storage, forcing reclaim.

Signed-off-by: Hans Holmberg <hans.holmberg@wdc.com>
---

Test results in my setup (kernel 6.8.0-rc4+)
	f2fs on zoned nullblk: pass (77s)
	f2fs on conventional nvme ssd: pass (13s)
	btrfs on zoned nublk: fails (-ENOSPC)
	btrfs on conventional nvme ssd: fails (-ENOSPC)
	xfs on conventional nvme ssd: pass (8s)

Johannes(cc) is working on the btrfs ENOSPC issue.
	 
 tests/generic/744     | 124 ++++++++++++++++++++++++++++++++++++++++++
 tests/generic/744.out |   6 ++
 2 files changed, 130 insertions(+)
 create mode 100755 tests/generic/744
 create mode 100644 tests/generic/744.out

Comments

Hans Holmberg April 16, 2024, 9:07 a.m. UTC | #1
+Zorro (doh!)

On 2024-04-15 13:23, Hans Holmberg wrote:
> This test stresses garbage collection for file systems by first filling
> up a scratch mount to a specific usage point with files of random size,
> then doing overwrites in parallel with deletes to fragment the backing
> storage, forcing reclaim.
> 
> Signed-off-by: Hans Holmberg <hans.holmberg@wdc.com>
> ---
> 
> Test results in my setup (kernel 6.8.0-rc4+)
> 	f2fs on zoned nullblk: pass (77s)
> 	f2fs on conventional nvme ssd: pass (13s)
> 	btrfs on zoned nublk: fails (-ENOSPC)
> 	btrfs on conventional nvme ssd: fails (-ENOSPC)
> 	xfs on conventional nvme ssd: pass (8s)
> 
> Johannes(cc) is working on the btrfs ENOSPC issue.
> 	
>   tests/generic/744     | 124 ++++++++++++++++++++++++++++++++++++++++++
>   tests/generic/744.out |   6 ++
>   2 files changed, 130 insertions(+)
>   create mode 100755 tests/generic/744
>   create mode 100644 tests/generic/744.out
> 
> diff --git a/tests/generic/744 b/tests/generic/744
> new file mode 100755
> index 000000000000..2c7ab76bf8b1
> --- /dev/null
> +++ b/tests/generic/744
> @@ -0,0 +1,124 @@
> +#! /bin/bash
> +# SPDX-License-Identifier: GPL-2.0
> +# Copyright (c) 2024 Western Digital Corporation.  All Rights Reserved.
> +#
> +# FS QA Test No. 744
> +#
> +# Inspired by btrfs/273 and generic/015
> +#
> +# This test stresses garbage collection in file systems
> +# by first filling up a scratch mount to a specific usage point with
> +# files of random size, then doing overwrites in parallel with
> +# deletes to fragment the backing zones, forcing reclaim.
> +
> +. ./common/preamble
> +_begin_fstest auto
> +
> +# real QA test starts here
> +
> +_require_scratch
> +
> +# This test requires specific data space usage, skip if we have compression
> +# enabled.
> +_require_no_compress
> +
> +M=$((1024 * 1024))
> +min_fsz=$((1 * ${M}))
> +max_fsz=$((256 * ${M}))
> +bs=${M}
> +fill_percent=95
> +overwrite_percentage=20
> +seq=0
> +
> +_create_file() {
> +	local file_name=${SCRATCH_MNT}/data_$1
> +	local file_sz=$2
> +	local dd_extra=$3
> +
> +	POSIXLY_CORRECT=yes dd if=/dev/zero of=${file_name} \
> +		bs=${bs} count=$(( $file_sz / ${bs} )) \
> +		status=none $dd_extra  2>&1
> +
> +	status=$?
> +	if [ $status -ne 0 ]; then
> +		echo "Failed writing $file_name" >>$seqres.full
> +		exit
> +	fi
> +}
> +
> +_total_M() {
> +	local total=$(stat -f -c '%b' ${SCRATCH_MNT})
> +	local bs=$(stat -f -c '%S' ${SCRATCH_MNT})
> +	echo $(( ${total} * ${bs} / ${M}))
> +}
> +
> +_used_percent() {
> +	local available=$(stat -f -c '%a' ${SCRATCH_MNT})
> +	local total=$(stat -f -c '%b' ${SCRATCH_MNT})
> +	echo $((100 - (100 * ${available}) / ${total} ))
> +}
> +
> +
> +_delete_random_file() {
> +	local to_delete=$(find ${SCRATCH_MNT} -type f | shuf | head -1)
> +	rm $to_delete
> +	sync ${SCRATCH_MNT}
> +}
> +
> +_get_random_fsz() {
> +	local r=$RANDOM
> +	echo $(( ${min_fsz} + (${max_fsz} - ${min_fsz}) * (${r} % 100) / 100 ))
> +}
> +
> +_direct_fillup () {
> +	while [ $(_used_percent) -lt $fill_percent ]; do
> +		local fsz=$(_get_random_fsz)
> +
> +		_create_file $seq $fsz "oflag=direct conv=fsync"
> +		seq=$((${seq} + 1))
> +	done
> +}
> +
> +_mixed_write_delete() {
> +	local dd_extra=$1
> +	local total_M=$(_total_M)
> +	local to_write_M=$(( ${overwrite_percentage} * ${total_M} / 100 ))
> +	local written_M=0
> +
> +	while [ $written_M -lt $to_write_M ]; do
> +		if [ $(_used_percent) -lt $fill_percent ]; then
> +			local fsz=$(_get_random_fsz)
> +
> +			_create_file $seq $fsz "$dd_extra"
> +			written_M=$((${written_M} + ${fsz}/${M}))
> +			seq=$((${seq} + 1))
> +		else
> +			_delete_random_file
> +		fi
> +	done
> +}
> +
> +seed=$RANDOM
> +RANDOM=$seed
> +echo "Running test with seed=$seed" >>$seqres.full
> +
> +_scratch_mkfs_sized $((8 * 1024 * 1024 * 1024)) >>$seqres.full
> +_scratch_mount
> +
> +echo "Starting fillup using direct IO"
> +_direct_fillup
> +
> +echo "Starting mixed write/delete test using direct IO"
> +_mixed_write_delete "oflag=direct"
> +
> +echo "Starting mixed write/delete test using buffered IO"
> +_mixed_write_delete ""
> +
> +echo "Syncing"
> +sync ${SCRATCH_MNT}/*
> +
> +echo "Done, all good"
> +
> +# success, all done
> +status=0
> +exit
> diff --git a/tests/generic/744.out b/tests/generic/744.out
> new file mode 100644
> index 000000000000..b40c2f43108e
> --- /dev/null
> +++ b/tests/generic/744.out
> @@ -0,0 +1,6 @@
> +QA output created by 744
> +Starting fillup using direct IO
> +Starting mixed write/delete test using direct IO
> +Starting mixed write/delete test using buffered IO
> +Syncing
> +Done, all good
Darrick J. Wong April 16, 2024, 6:54 p.m. UTC | #2
On Tue, Apr 16, 2024 at 09:07:43AM +0000, Hans Holmberg wrote:
> +Zorro (doh!)
> 
> On 2024-04-15 13:23, Hans Holmberg wrote:
> > This test stresses garbage collection for file systems by first filling
> > up a scratch mount to a specific usage point with files of random size,
> > then doing overwrites in parallel with deletes to fragment the backing
> > storage, forcing reclaim.
> > 
> > Signed-off-by: Hans Holmberg <hans.holmberg@wdc.com>
> > ---
> > 
> > Test results in my setup (kernel 6.8.0-rc4+)
> > 	f2fs on zoned nullblk: pass (77s)
> > 	f2fs on conventional nvme ssd: pass (13s)
> > 	btrfs on zoned nublk: fails (-ENOSPC)
> > 	btrfs on conventional nvme ssd: fails (-ENOSPC)
> > 	xfs on conventional nvme ssd: pass (8s)
> > 
> > Johannes(cc) is working on the btrfs ENOSPC issue.
> > 	
> >   tests/generic/744     | 124 ++++++++++++++++++++++++++++++++++++++++++
> >   tests/generic/744.out |   6 ++
> >   2 files changed, 130 insertions(+)
> >   create mode 100755 tests/generic/744
> >   create mode 100644 tests/generic/744.out
> > 
> > diff --git a/tests/generic/744 b/tests/generic/744
> > new file mode 100755
> > index 000000000000..2c7ab76bf8b1
> > --- /dev/null
> > +++ b/tests/generic/744
> > @@ -0,0 +1,124 @@
> > +#! /bin/bash
> > +# SPDX-License-Identifier: GPL-2.0
> > +# Copyright (c) 2024 Western Digital Corporation.  All Rights Reserved.
> > +#
> > +# FS QA Test No. 744
> > +#
> > +# Inspired by btrfs/273 and generic/015
> > +#
> > +# This test stresses garbage collection in file systems
> > +# by first filling up a scratch mount to a specific usage point with
> > +# files of random size, then doing overwrites in parallel with
> > +# deletes to fragment the backing zones, forcing reclaim.
> > +
> > +. ./common/preamble
> > +_begin_fstest auto
> > +
> > +# real QA test starts here
> > +
> > +_require_scratch
> > +
> > +# This test requires specific data space usage, skip if we have compression
> > +# enabled.
> > +_require_no_compress
> > +
> > +M=$((1024 * 1024))
> > +min_fsz=$((1 * ${M}))
> > +max_fsz=$((256 * ${M}))
> > +bs=${M}
> > +fill_percent=95
> > +overwrite_percentage=20
> > +seq=0
> > +
> > +_create_file() {
> > +	local file_name=${SCRATCH_MNT}/data_$1
> > +	local file_sz=$2
> > +	local dd_extra=$3
> > +
> > +	POSIXLY_CORRECT=yes dd if=/dev/zero of=${file_name} \
> > +		bs=${bs} count=$(( $file_sz / ${bs} )) \
> > +		status=none $dd_extra  2>&1
> > +
> > +	status=$?
> > +	if [ $status -ne 0 ]; then
> > +		echo "Failed writing $file_name" >>$seqres.full
> > +		exit
> > +	fi
> > +}

I wonder, is there a particular reason for doing all these file
operations with shell code instead of using fsstress to create and
delete files to fill the fs and stress all the zone-gc code?  This test
reminds me a lot of generic/476 but with more fork()ing.

--D

> > +
> > +_total_M() {
> > +	local total=$(stat -f -c '%b' ${SCRATCH_MNT})
> > +	local bs=$(stat -f -c '%S' ${SCRATCH_MNT})
> > +	echo $(( ${total} * ${bs} / ${M}))
> > +}
> > +
> > +_used_percent() {
> > +	local available=$(stat -f -c '%a' ${SCRATCH_MNT})
> > +	local total=$(stat -f -c '%b' ${SCRATCH_MNT})
> > +	echo $((100 - (100 * ${available}) / ${total} ))
> > +}
> > +
> > +
> > +_delete_random_file() {
> > +	local to_delete=$(find ${SCRATCH_MNT} -type f | shuf | head -1)
> > +	rm $to_delete
> > +	sync ${SCRATCH_MNT}
> > +}
> > +
> > +_get_random_fsz() {
> > +	local r=$RANDOM
> > +	echo $(( ${min_fsz} + (${max_fsz} - ${min_fsz}) * (${r} % 100) / 100 ))
> > +}
> > +
> > +_direct_fillup () {
> > +	while [ $(_used_percent) -lt $fill_percent ]; do
> > +		local fsz=$(_get_random_fsz)
> > +
> > +		_create_file $seq $fsz "oflag=direct conv=fsync"
> > +		seq=$((${seq} + 1))
> > +	done
> > +}
> > +
> > +_mixed_write_delete() {
> > +	local dd_extra=$1
> > +	local total_M=$(_total_M)
> > +	local to_write_M=$(( ${overwrite_percentage} * ${total_M} / 100 ))
> > +	local written_M=0
> > +
> > +	while [ $written_M -lt $to_write_M ]; do
> > +		if [ $(_used_percent) -lt $fill_percent ]; then
> > +			local fsz=$(_get_random_fsz)
> > +
> > +			_create_file $seq $fsz "$dd_extra"
> > +			written_M=$((${written_M} + ${fsz}/${M}))
> > +			seq=$((${seq} + 1))
> > +		else
> > +			_delete_random_file
> > +		fi
> > +	done
> > +}
> > +
> > +seed=$RANDOM
> > +RANDOM=$seed
> > +echo "Running test with seed=$seed" >>$seqres.full
> > +
> > +_scratch_mkfs_sized $((8 * 1024 * 1024 * 1024)) >>$seqres.full
> > +_scratch_mount
> > +
> > +echo "Starting fillup using direct IO"
> > +_direct_fillup
> > +
> > +echo "Starting mixed write/delete test using direct IO"
> > +_mixed_write_delete "oflag=direct"
> > +
> > +echo "Starting mixed write/delete test using buffered IO"
> > +_mixed_write_delete ""
> > +
> > +echo "Syncing"
> > +sync ${SCRATCH_MNT}/*
> > +
> > +echo "Done, all good"
> > +
> > +# success, all done
> > +status=0
> > +exit
> > diff --git a/tests/generic/744.out b/tests/generic/744.out
> > new file mode 100644
> > index 000000000000..b40c2f43108e
> > --- /dev/null
> > +++ b/tests/generic/744.out
> > @@ -0,0 +1,6 @@
> > +QA output created by 744
> > +Starting fillup using direct IO
> > +Starting mixed write/delete test using direct IO
> > +Starting mixed write/delete test using buffered IO
> > +Syncing
> > +Done, all good
>
Zorro Lang April 17, 2024, 12:43 p.m. UTC | #3
On Tue, Apr 16, 2024 at 11:54:37AM -0700, Darrick J. Wong wrote:
> On Tue, Apr 16, 2024 at 09:07:43AM +0000, Hans Holmberg wrote:
> > +Zorro (doh!)
> > 
> > On 2024-04-15 13:23, Hans Holmberg wrote:
> > > This test stresses garbage collection for file systems by first filling
> > > up a scratch mount to a specific usage point with files of random size,
> > > then doing overwrites in parallel with deletes to fragment the backing
> > > storage, forcing reclaim.
> > > 
> > > Signed-off-by: Hans Holmberg <hans.holmberg@wdc.com>
> > > ---
> > > 
> > > Test results in my setup (kernel 6.8.0-rc4+)
> > > 	f2fs on zoned nullblk: pass (77s)
> > > 	f2fs on conventional nvme ssd: pass (13s)
> > > 	btrfs on zoned nublk: fails (-ENOSPC)
> > > 	btrfs on conventional nvme ssd: fails (-ENOSPC)
> > > 	xfs on conventional nvme ssd: pass (8s)
> > > 
> > > Johannes(cc) is working on the btrfs ENOSPC issue.
> > > 	
> > >   tests/generic/744     | 124 ++++++++++++++++++++++++++++++++++++++++++
> > >   tests/generic/744.out |   6 ++
> > >   2 files changed, 130 insertions(+)
> > >   create mode 100755 tests/generic/744
> > >   create mode 100644 tests/generic/744.out
> > > 
> > > diff --git a/tests/generic/744 b/tests/generic/744
> > > new file mode 100755
> > > index 000000000000..2c7ab76bf8b1
> > > --- /dev/null
> > > +++ b/tests/generic/744
> > > @@ -0,0 +1,124 @@
> > > +#! /bin/bash
> > > +# SPDX-License-Identifier: GPL-2.0
> > > +# Copyright (c) 2024 Western Digital Corporation.  All Rights Reserved.
> > > +#
> > > +# FS QA Test No. 744
> > > +#
> > > +# Inspired by btrfs/273 and generic/015
> > > +#
> > > +# This test stresses garbage collection in file systems
> > > +# by first filling up a scratch mount to a specific usage point with
> > > +# files of random size, then doing overwrites in parallel with
> > > +# deletes to fragment the backing zones, forcing reclaim.
> > > +
> > > +. ./common/preamble
> > > +_begin_fstest auto
> > > +
> > > +# real QA test starts here
> > > +
> > > +_require_scratch
> > > +
> > > +# This test requires specific data space usage, skip if we have compression
> > > +# enabled.
> > > +_require_no_compress
> > > +
> > > +M=$((1024 * 1024))
> > > +min_fsz=$((1 * ${M}))
> > > +max_fsz=$((256 * ${M}))
> > > +bs=${M}
> > > +fill_percent=95
> > > +overwrite_percentage=20
> > > +seq=0
> > > +
> > > +_create_file() {
> > > +	local file_name=${SCRATCH_MNT}/data_$1
> > > +	local file_sz=$2
> > > +	local dd_extra=$3
> > > +
> > > +	POSIXLY_CORRECT=yes dd if=/dev/zero of=${file_name} \
> > > +		bs=${bs} count=$(( $file_sz / ${bs} )) \
> > > +		status=none $dd_extra  2>&1
> > > +
> > > +	status=$?
> > > +	if [ $status -ne 0 ]; then
> > > +		echo "Failed writing $file_name" >>$seqres.full
> > > +		exit
> > > +	fi
> > > +}
> 
> I wonder, is there a particular reason for doing all these file
> operations with shell code instead of using fsstress to create and
> delete files to fill the fs and stress all the zone-gc code?  This test
> reminds me a lot of generic/476 but with more fork()ing.

/me has the same confusion. Can this test cover more things than using
fsstress (to do reclaim test) ? Or does it uncover some known bugs which
other cases can't?

Thanks,
Zorro

> 
> --D
> 
> > > +
> > > +_total_M() {
> > > +	local total=$(stat -f -c '%b' ${SCRATCH_MNT})
> > > +	local bs=$(stat -f -c '%S' ${SCRATCH_MNT})
> > > +	echo $(( ${total} * ${bs} / ${M}))
> > > +}
> > > +
> > > +_used_percent() {
> > > +	local available=$(stat -f -c '%a' ${SCRATCH_MNT})
> > > +	local total=$(stat -f -c '%b' ${SCRATCH_MNT})
> > > +	echo $((100 - (100 * ${available}) / ${total} ))
> > > +}
> > > +
> > > +
> > > +_delete_random_file() {
> > > +	local to_delete=$(find ${SCRATCH_MNT} -type f | shuf | head -1)
> > > +	rm $to_delete
> > > +	sync ${SCRATCH_MNT}
> > > +}
> > > +
> > > +_get_random_fsz() {
> > > +	local r=$RANDOM
> > > +	echo $(( ${min_fsz} + (${max_fsz} - ${min_fsz}) * (${r} % 100) / 100 ))
> > > +}
> > > +
> > > +_direct_fillup () {
> > > +	while [ $(_used_percent) -lt $fill_percent ]; do
> > > +		local fsz=$(_get_random_fsz)
> > > +
> > > +		_create_file $seq $fsz "oflag=direct conv=fsync"
> > > +		seq=$((${seq} + 1))
> > > +	done
> > > +}
> > > +
> > > +_mixed_write_delete() {
> > > +	local dd_extra=$1
> > > +	local total_M=$(_total_M)
> > > +	local to_write_M=$(( ${overwrite_percentage} * ${total_M} / 100 ))
> > > +	local written_M=0
> > > +
> > > +	while [ $written_M -lt $to_write_M ]; do
> > > +		if [ $(_used_percent) -lt $fill_percent ]; then
> > > +			local fsz=$(_get_random_fsz)
> > > +
> > > +			_create_file $seq $fsz "$dd_extra"
> > > +			written_M=$((${written_M} + ${fsz}/${M}))
> > > +			seq=$((${seq} + 1))
> > > +		else
> > > +			_delete_random_file
> > > +		fi
> > > +	done
> > > +}
> > > +
> > > +seed=$RANDOM
> > > +RANDOM=$seed
> > > +echo "Running test with seed=$seed" >>$seqres.full
> > > +
> > > +_scratch_mkfs_sized $((8 * 1024 * 1024 * 1024)) >>$seqres.full
> > > +_scratch_mount
> > > +
> > > +echo "Starting fillup using direct IO"
> > > +_direct_fillup
> > > +
> > > +echo "Starting mixed write/delete test using direct IO"
> > > +_mixed_write_delete "oflag=direct"
> > > +
> > > +echo "Starting mixed write/delete test using buffered IO"
> > > +_mixed_write_delete ""
> > > +
> > > +echo "Syncing"
> > > +sync ${SCRATCH_MNT}/*
> > > +
> > > +echo "Done, all good"
> > > +
> > > +# success, all done
> > > +status=0
> > > +exit
> > > diff --git a/tests/generic/744.out b/tests/generic/744.out
> > > new file mode 100644
> > > index 000000000000..b40c2f43108e
> > > --- /dev/null
> > > +++ b/tests/generic/744.out
> > > @@ -0,0 +1,6 @@
> > > +QA output created by 744
> > > +Starting fillup using direct IO
> > > +Starting mixed write/delete test using direct IO
> > > +Starting mixed write/delete test using buffered IO
> > > +Syncing
> > > +Done, all good
> > 
>
Hans Holmberg April 17, 2024, 1:21 p.m. UTC | #4
On 2024-04-17 14:43, Zorro Lang wrote:
> On Tue, Apr 16, 2024 at 11:54:37AM -0700, Darrick J. Wong wrote:
>> On Tue, Apr 16, 2024 at 09:07:43AM +0000, Hans Holmberg wrote:
>>> +Zorro (doh!)
>>>
>>> On 2024-04-15 13:23, Hans Holmberg wrote:
>>>> This test stresses garbage collection for file systems by first filling
>>>> up a scratch mount to a specific usage point with files of random size,
>>>> then doing overwrites in parallel with deletes to fragment the backing
>>>> storage, forcing reclaim.
>>>>
>>>> Signed-off-by: Hans Holmberg <hans.holmberg@wdc.com>
>>>> ---
>>>>
>>>> Test results in my setup (kernel 6.8.0-rc4+)
>>>> 	f2fs on zoned nullblk: pass (77s)
>>>> 	f2fs on conventional nvme ssd: pass (13s)
>>>> 	btrfs on zoned nublk: fails (-ENOSPC)
>>>> 	btrfs on conventional nvme ssd: fails (-ENOSPC)
>>>> 	xfs on conventional nvme ssd: pass (8s)
>>>>
>>>> Johannes(cc) is working on the btrfs ENOSPC issue.
>>>> 	
>>>>    tests/generic/744     | 124 ++++++++++++++++++++++++++++++++++++++++++
>>>>    tests/generic/744.out |   6 ++
>>>>    2 files changed, 130 insertions(+)
>>>>    create mode 100755 tests/generic/744
>>>>    create mode 100644 tests/generic/744.out
>>>>
>>>> diff --git a/tests/generic/744 b/tests/generic/744
>>>> new file mode 100755
>>>> index 000000000000..2c7ab76bf8b1
>>>> --- /dev/null
>>>> +++ b/tests/generic/744
>>>> @@ -0,0 +1,124 @@
>>>> +#! /bin/bash
>>>> +# SPDX-License-Identifier: GPL-2.0
>>>> +# Copyright (c) 2024 Western Digital Corporation.  All Rights Reserved.
>>>> +#
>>>> +# FS QA Test No. 744
>>>> +#
>>>> +# Inspired by btrfs/273 and generic/015
>>>> +#
>>>> +# This test stresses garbage collection in file systems
>>>> +# by first filling up a scratch mount to a specific usage point with
>>>> +# files of random size, then doing overwrites in parallel with
>>>> +# deletes to fragment the backing zones, forcing reclaim.
>>>> +
>>>> +. ./common/preamble
>>>> +_begin_fstest auto
>>>> +
>>>> +# real QA test starts here
>>>> +
>>>> +_require_scratch
>>>> +
>>>> +# This test requires specific data space usage, skip if we have compression
>>>> +# enabled.
>>>> +_require_no_compress
>>>> +
>>>> +M=$((1024 * 1024))
>>>> +min_fsz=$((1 * ${M}))
>>>> +max_fsz=$((256 * ${M}))
>>>> +bs=${M}
>>>> +fill_percent=95
>>>> +overwrite_percentage=20
>>>> +seq=0
>>>> +
>>>> +_create_file() {
>>>> +	local file_name=${SCRATCH_MNT}/data_$1
>>>> +	local file_sz=$2
>>>> +	local dd_extra=$3
>>>> +
>>>> +	POSIXLY_CORRECT=yes dd if=/dev/zero of=${file_name} \
>>>> +		bs=${bs} count=$(( $file_sz / ${bs} )) \
>>>> +		status=none $dd_extra  2>&1
>>>> +
>>>> +	status=$?
>>>> +	if [ $status -ne 0 ]; then
>>>> +		echo "Failed writing $file_name" >>$seqres.full
>>>> +		exit
>>>> +	fi
>>>> +}
>>
>> I wonder, is there a particular reason for doing all these file
>> operations with shell code instead of using fsstress to create and
>> delete files to fill the fs and stress all the zone-gc code?  This test
>> reminds me a lot of generic/476 but with more fork()ing.
> 
> /me has the same confusion. Can this test cover more things than using
> fsstress (to do reclaim test) ? Or does it uncover some known bugs which
> other cases can't?

ah, adding some more background is probably useful:

I've been using this test to stress the crap out the zoned xfs garbage
collection / write throttling implementation for zoned rt subvolumes
support in xfs and it has found a number of issues during implementation
that i did not reproduce by other means.

I think it also has wider applicability as it triggers bugs in btrfs. 
f2fs passes without issues, but probably benefits from a quick smoke gc 
test as well. Discussed this with Bart and Daeho (now in cc) before 
submitting.

Using fsstress would be cool, but as far as I can tell it cannot
be told to operate at a specific file system usage point, which
is a key thing for this test.

Thanks,
Hans

> 
> Thanks,
> Zorro
> 
>>
>> --D
>>
>>>> +
>>>> +_total_M() {
>>>> +	local total=$(stat -f -c '%b' ${SCRATCH_MNT})
>>>> +	local bs=$(stat -f -c '%S' ${SCRATCH_MNT})
>>>> +	echo $(( ${total} * ${bs} / ${M}))
>>>> +}
>>>> +
>>>> +_used_percent() {
>>>> +	local available=$(stat -f -c '%a' ${SCRATCH_MNT})
>>>> +	local total=$(stat -f -c '%b' ${SCRATCH_MNT})
>>>> +	echo $((100 - (100 * ${available}) / ${total} ))
>>>> +}
>>>> +
>>>> +
>>>> +_delete_random_file() {
>>>> +	local to_delete=$(find ${SCRATCH_MNT} -type f | shuf | head -1)
>>>> +	rm $to_delete
>>>> +	sync ${SCRATCH_MNT}
>>>> +}
>>>> +
>>>> +_get_random_fsz() {
>>>> +	local r=$RANDOM
>>>> +	echo $(( ${min_fsz} + (${max_fsz} - ${min_fsz}) * (${r} % 100) / 100 ))
>>>> +}
>>>> +
>>>> +_direct_fillup () {
>>>> +	while [ $(_used_percent) -lt $fill_percent ]; do
>>>> +		local fsz=$(_get_random_fsz)
>>>> +
>>>> +		_create_file $seq $fsz "oflag=direct conv=fsync"
>>>> +		seq=$((${seq} + 1))
>>>> +	done
>>>> +}
>>>> +
>>>> +_mixed_write_delete() {
>>>> +	local dd_extra=$1
>>>> +	local total_M=$(_total_M)
>>>> +	local to_write_M=$(( ${overwrite_percentage} * ${total_M} / 100 ))
>>>> +	local written_M=0
>>>> +
>>>> +	while [ $written_M -lt $to_write_M ]; do
>>>> +		if [ $(_used_percent) -lt $fill_percent ]; then
>>>> +			local fsz=$(_get_random_fsz)
>>>> +
>>>> +			_create_file $seq $fsz "$dd_extra"
>>>> +			written_M=$((${written_M} + ${fsz}/${M}))
>>>> +			seq=$((${seq} + 1))
>>>> +		else
>>>> +			_delete_random_file
>>>> +		fi
>>>> +	done
>>>> +}
>>>> +
>>>> +seed=$RANDOM
>>>> +RANDOM=$seed
>>>> +echo "Running test with seed=$seed" >>$seqres.full
>>>> +
>>>> +_scratch_mkfs_sized $((8 * 1024 * 1024 * 1024)) >>$seqres.full
>>>> +_scratch_mount
>>>> +
>>>> +echo "Starting fillup using direct IO"
>>>> +_direct_fillup
>>>> +
>>>> +echo "Starting mixed write/delete test using direct IO"
>>>> +_mixed_write_delete "oflag=direct"
>>>> +
>>>> +echo "Starting mixed write/delete test using buffered IO"
>>>> +_mixed_write_delete ""
>>>> +
>>>> +echo "Syncing"
>>>> +sync ${SCRATCH_MNT}/*
>>>> +
>>>> +echo "Done, all good"
>>>> +
>>>> +# success, all done
>>>> +status=0
>>>> +exit
>>>> diff --git a/tests/generic/744.out b/tests/generic/744.out
>>>> new file mode 100644
>>>> index 000000000000..b40c2f43108e
>>>> --- /dev/null
>>>> +++ b/tests/generic/744.out
>>>> @@ -0,0 +1,6 @@
>>>> +QA output created by 744
>>>> +Starting fillup using direct IO
>>>> +Starting mixed write/delete test using direct IO
>>>> +Starting mixed write/delete test using buffered IO
>>>> +Syncing
>>>> +Done, all good
>>>
>>
> 
>
Zorro Lang April 17, 2024, 2:06 p.m. UTC | #5
On Wed, Apr 17, 2024 at 01:21:39PM +0000, Hans Holmberg wrote:
> On 2024-04-17 14:43, Zorro Lang wrote:
> > On Tue, Apr 16, 2024 at 11:54:37AM -0700, Darrick J. Wong wrote:
> >> On Tue, Apr 16, 2024 at 09:07:43AM +0000, Hans Holmberg wrote:
> >>> +Zorro (doh!)
> >>>
> >>> On 2024-04-15 13:23, Hans Holmberg wrote:
> >>>> This test stresses garbage collection for file systems by first filling
> >>>> up a scratch mount to a specific usage point with files of random size,
> >>>> then doing overwrites in parallel with deletes to fragment the backing
> >>>> storage, forcing reclaim.
> >>>>
> >>>> Signed-off-by: Hans Holmberg <hans.holmberg@wdc.com>
> >>>> ---
> >>>>
> >>>> Test results in my setup (kernel 6.8.0-rc4+)
> >>>> 	f2fs on zoned nullblk: pass (77s)
> >>>> 	f2fs on conventional nvme ssd: pass (13s)
> >>>> 	btrfs on zoned nublk: fails (-ENOSPC)
> >>>> 	btrfs on conventional nvme ssd: fails (-ENOSPC)
> >>>> 	xfs on conventional nvme ssd: pass (8s)
> >>>>
> >>>> Johannes(cc) is working on the btrfs ENOSPC issue.
> >>>> 	
> >>>>    tests/generic/744     | 124 ++++++++++++++++++++++++++++++++++++++++++
> >>>>    tests/generic/744.out |   6 ++
> >>>>    2 files changed, 130 insertions(+)
> >>>>    create mode 100755 tests/generic/744
> >>>>    create mode 100644 tests/generic/744.out
> >>>>
> >>>> diff --git a/tests/generic/744 b/tests/generic/744
> >>>> new file mode 100755
> >>>> index 000000000000..2c7ab76bf8b1
> >>>> --- /dev/null
> >>>> +++ b/tests/generic/744
> >>>> @@ -0,0 +1,124 @@
> >>>> +#! /bin/bash
> >>>> +# SPDX-License-Identifier: GPL-2.0
> >>>> +# Copyright (c) 2024 Western Digital Corporation.  All Rights Reserved.
> >>>> +#
> >>>> +# FS QA Test No. 744
> >>>> +#
> >>>> +# Inspired by btrfs/273 and generic/015
> >>>> +#
> >>>> +# This test stresses garbage collection in file systems
> >>>> +# by first filling up a scratch mount to a specific usage point with
> >>>> +# files of random size, then doing overwrites in parallel with
> >>>> +# deletes to fragment the backing zones, forcing reclaim.
> >>>> +
> >>>> +. ./common/preamble
> >>>> +_begin_fstest auto
> >>>> +
> >>>> +# real QA test starts here
> >>>> +
> >>>> +_require_scratch
> >>>> +
> >>>> +# This test requires specific data space usage, skip if we have compression
> >>>> +# enabled.
> >>>> +_require_no_compress
> >>>> +
> >>>> +M=$((1024 * 1024))
> >>>> +min_fsz=$((1 * ${M}))
> >>>> +max_fsz=$((256 * ${M}))
> >>>> +bs=${M}
> >>>> +fill_percent=95
> >>>> +overwrite_percentage=20
> >>>> +seq=0
> >>>> +
> >>>> +_create_file() {
> >>>> +	local file_name=${SCRATCH_MNT}/data_$1
> >>>> +	local file_sz=$2
> >>>> +	local dd_extra=$3
> >>>> +
> >>>> +	POSIXLY_CORRECT=yes dd if=/dev/zero of=${file_name} \
> >>>> +		bs=${bs} count=$(( $file_sz / ${bs} )) \
> >>>> +		status=none $dd_extra  2>&1
> >>>> +
> >>>> +	status=$?
> >>>> +	if [ $status -ne 0 ]; then
> >>>> +		echo "Failed writing $file_name" >>$seqres.full
> >>>> +		exit
> >>>> +	fi
> >>>> +}
> >>
> >> I wonder, is there a particular reason for doing all these file
> >> operations with shell code instead of using fsstress to create and
> >> delete files to fill the fs and stress all the zone-gc code?  This test
> >> reminds me a lot of generic/476 but with more fork()ing.
> > 
> > /me has the same confusion. Can this test cover more things than using
> > fsstress (to do reclaim test) ? Or does it uncover some known bugs which
> > other cases can't?
> 
> ah, adding some more background is probably useful:
> 
> I've been using this test to stress the crap out the zoned xfs garbage
> collection / write throttling implementation for zoned rt subvolumes
> support in xfs and it has found a number of issues during implementation
> that i did not reproduce by other means.
> 
> I think it also has wider applicability as it triggers bugs in btrfs. 
> f2fs passes without issues, but probably benefits from a quick smoke gc 
> test as well. Discussed this with Bart and Daeho (now in cc) before 
> submitting.
> 
> Using fsstress would be cool, but as far as I can tell it cannot
> be told to operate at a specific file system usage point, which
> is a key thing for this test.

As a random test case, if this case can be transformed to use fsstress to cover
same issues, that would be nice.

But if as a regression test case, it has its particular test coverage, and the
issue it covered can't be reproduced by fsstress way, then let's work on this
bash script one.

Any thoughts?

Thanks,
Zorro

> 
> Thanks,
> Hans
> 
> > 
> > Thanks,
> > Zorro
> > 
> >>
> >> --D
> >>
> >>>> +
> >>>> +_total_M() {
> >>>> +	local total=$(stat -f -c '%b' ${SCRATCH_MNT})
> >>>> +	local bs=$(stat -f -c '%S' ${SCRATCH_MNT})
> >>>> +	echo $(( ${total} * ${bs} / ${M}))
> >>>> +}
> >>>> +
> >>>> +_used_percent() {
> >>>> +	local available=$(stat -f -c '%a' ${SCRATCH_MNT})
> >>>> +	local total=$(stat -f -c '%b' ${SCRATCH_MNT})
> >>>> +	echo $((100 - (100 * ${available}) / ${total} ))
> >>>> +}
> >>>> +
> >>>> +
> >>>> +_delete_random_file() {
> >>>> +	local to_delete=$(find ${SCRATCH_MNT} -type f | shuf | head -1)
> >>>> +	rm $to_delete
> >>>> +	sync ${SCRATCH_MNT}
> >>>> +}
> >>>> +
> >>>> +_get_random_fsz() {
> >>>> +	local r=$RANDOM
> >>>> +	echo $(( ${min_fsz} + (${max_fsz} - ${min_fsz}) * (${r} % 100) / 100 ))
> >>>> +}
> >>>> +
> >>>> +_direct_fillup () {
> >>>> +	while [ $(_used_percent) -lt $fill_percent ]; do
> >>>> +		local fsz=$(_get_random_fsz)
> >>>> +
> >>>> +		_create_file $seq $fsz "oflag=direct conv=fsync"
> >>>> +		seq=$((${seq} + 1))
> >>>> +	done
> >>>> +}
> >>>> +
> >>>> +_mixed_write_delete() {
> >>>> +	local dd_extra=$1
> >>>> +	local total_M=$(_total_M)
> >>>> +	local to_write_M=$(( ${overwrite_percentage} * ${total_M} / 100 ))
> >>>> +	local written_M=0
> >>>> +
> >>>> +	while [ $written_M -lt $to_write_M ]; do
> >>>> +		if [ $(_used_percent) -lt $fill_percent ]; then
> >>>> +			local fsz=$(_get_random_fsz)
> >>>> +
> >>>> +			_create_file $seq $fsz "$dd_extra"
> >>>> +			written_M=$((${written_M} + ${fsz}/${M}))
> >>>> +			seq=$((${seq} + 1))
> >>>> +		else
> >>>> +			_delete_random_file
> >>>> +		fi
> >>>> +	done
> >>>> +}
> >>>> +
> >>>> +seed=$RANDOM
> >>>> +RANDOM=$seed
> >>>> +echo "Running test with seed=$seed" >>$seqres.full
> >>>> +
> >>>> +_scratch_mkfs_sized $((8 * 1024 * 1024 * 1024)) >>$seqres.full
> >>>> +_scratch_mount
> >>>> +
> >>>> +echo "Starting fillup using direct IO"
> >>>> +_direct_fillup
> >>>> +
> >>>> +echo "Starting mixed write/delete test using direct IO"
> >>>> +_mixed_write_delete "oflag=direct"
> >>>> +
> >>>> +echo "Starting mixed write/delete test using buffered IO"
> >>>> +_mixed_write_delete ""
> >>>> +
> >>>> +echo "Syncing"
> >>>> +sync ${SCRATCH_MNT}/*
> >>>> +
> >>>> +echo "Done, all good"
> >>>> +
> >>>> +# success, all done
> >>>> +status=0
> >>>> +exit
> >>>> diff --git a/tests/generic/744.out b/tests/generic/744.out
> >>>> new file mode 100644
> >>>> index 000000000000..b40c2f43108e
> >>>> --- /dev/null
> >>>> +++ b/tests/generic/744.out
> >>>> @@ -0,0 +1,6 @@
> >>>> +QA output created by 744
> >>>> +Starting fillup using direct IO
> >>>> +Starting mixed write/delete test using direct IO
> >>>> +Starting mixed write/delete test using buffered IO
> >>>> +Syncing
> >>>> +Done, all good
> >>>
> >>
> > 
> > 
>
Hans Holmberg April 17, 2024, 2:45 p.m. UTC | #6
On 2024-04-17 16:07, Zorro Lang wrote:
> On Wed, Apr 17, 2024 at 01:21:39PM +0000, Hans Holmberg wrote:
>> On 2024-04-17 14:43, Zorro Lang wrote:
>>> On Tue, Apr 16, 2024 at 11:54:37AM -0700, Darrick J. Wong wrote:
>>>> On Tue, Apr 16, 2024 at 09:07:43AM +0000, Hans Holmberg wrote:
>>>>> +Zorro (doh!)
>>>>>
>>>>> On 2024-04-15 13:23, Hans Holmberg wrote:
>>>>>> This test stresses garbage collection for file systems by first filling
>>>>>> up a scratch mount to a specific usage point with files of random size,
>>>>>> then doing overwrites in parallel with deletes to fragment the backing
>>>>>> storage, forcing reclaim.
>>>>>>
>>>>>> Signed-off-by: Hans Holmberg <hans.holmberg@wdc.com>
>>>>>> ---
>>>>>>
>>>>>> Test results in my setup (kernel 6.8.0-rc4+)
>>>>>> 	f2fs on zoned nullblk: pass (77s)
>>>>>> 	f2fs on conventional nvme ssd: pass (13s)
>>>>>> 	btrfs on zoned nublk: fails (-ENOSPC)
>>>>>> 	btrfs on conventional nvme ssd: fails (-ENOSPC)
>>>>>> 	xfs on conventional nvme ssd: pass (8s)
>>>>>>
>>>>>> Johannes(cc) is working on the btrfs ENOSPC issue.
>>>>>> 	
>>>>>>     tests/generic/744     | 124 ++++++++++++++++++++++++++++++++++++++++++
>>>>>>     tests/generic/744.out |   6 ++
>>>>>>     2 files changed, 130 insertions(+)
>>>>>>     create mode 100755 tests/generic/744
>>>>>>     create mode 100644 tests/generic/744.out
>>>>>>
>>>>>> diff --git a/tests/generic/744 b/tests/generic/744
>>>>>> new file mode 100755
>>>>>> index 000000000000..2c7ab76bf8b1
>>>>>> --- /dev/null
>>>>>> +++ b/tests/generic/744
>>>>>> @@ -0,0 +1,124 @@
>>>>>> +#! /bin/bash
>>>>>> +# SPDX-License-Identifier: GPL-2.0
>>>>>> +# Copyright (c) 2024 Western Digital Corporation.  All Rights Reserved.
>>>>>> +#
>>>>>> +# FS QA Test No. 744
>>>>>> +#
>>>>>> +# Inspired by btrfs/273 and generic/015
>>>>>> +#
>>>>>> +# This test stresses garbage collection in file systems
>>>>>> +# by first filling up a scratch mount to a specific usage point with
>>>>>> +# files of random size, then doing overwrites in parallel with
>>>>>> +# deletes to fragment the backing zones, forcing reclaim.
>>>>>> +
>>>>>> +. ./common/preamble
>>>>>> +_begin_fstest auto
>>>>>> +
>>>>>> +# real QA test starts here
>>>>>> +
>>>>>> +_require_scratch
>>>>>> +
>>>>>> +# This test requires specific data space usage, skip if we have compression
>>>>>> +# enabled.
>>>>>> +_require_no_compress
>>>>>> +
>>>>>> +M=$((1024 * 1024))
>>>>>> +min_fsz=$((1 * ${M}))
>>>>>> +max_fsz=$((256 * ${M}))
>>>>>> +bs=${M}
>>>>>> +fill_percent=95
>>>>>> +overwrite_percentage=20
>>>>>> +seq=0
>>>>>> +
>>>>>> +_create_file() {
>>>>>> +	local file_name=${SCRATCH_MNT}/data_$1
>>>>>> +	local file_sz=$2
>>>>>> +	local dd_extra=$3
>>>>>> +
>>>>>> +	POSIXLY_CORRECT=yes dd if=/dev/zero of=${file_name} \
>>>>>> +		bs=${bs} count=$(( $file_sz / ${bs} )) \
>>>>>> +		status=none $dd_extra  2>&1
>>>>>> +
>>>>>> +	status=$?
>>>>>> +	if [ $status -ne 0 ]; then
>>>>>> +		echo "Failed writing $file_name" >>$seqres.full
>>>>>> +		exit
>>>>>> +	fi
>>>>>> +}
>>>>
>>>> I wonder, is there a particular reason for doing all these file
>>>> operations with shell code instead of using fsstress to create and
>>>> delete files to fill the fs and stress all the zone-gc code?  This test
>>>> reminds me a lot of generic/476 but with more fork()ing.
>>>
>>> /me has the same confusion. Can this test cover more things than using
>>> fsstress (to do reclaim test) ? Or does it uncover some known bugs which
>>> other cases can't?
>>
>> ah, adding some more background is probably useful:
>>
>> I've been using this test to stress the crap out the zoned xfs garbage
>> collection / write throttling implementation for zoned rt subvolumes
>> support in xfs and it has found a number of issues during implementation
>> that i did not reproduce by other means.
>>
>> I think it also has wider applicability as it triggers bugs in btrfs.
>> f2fs passes without issues, but probably benefits from a quick smoke gc
>> test as well. Discussed this with Bart and Daeho (now in cc) before
>> submitting.
>>
>> Using fsstress would be cool, but as far as I can tell it cannot
>> be told to operate at a specific file system usage point, which
>> is a key thing for this test.
> 
> As a random test case, if this case can be transformed to use fsstress to cover
> same issues, that would be nice.
> 
> But if as a regression test case, it has its particular test coverage, and the
> issue it covered can't be reproduced by fsstress way, then let's work on this
> bash script one.
> 
> Any thoughts?

Yeah, I think bash is preferable for this particular test case.
Bash also makes it easy to hack for people's private uses.

I use longer versions of this test (increasing overwrite_percentage)
for weekly testing.

If we need fsstress for reproducing any future gc bug we can add
whats missing to it then.

Does that make sense?

Thanks,
Hans

> 
> Thanks,
> Zorro
> 
>>
>> Thanks,
>> Hans
>>
>>>
>>> Thanks,
>>> Zorro
>>>
>>>>
>>>> --D
>>>>
>>>>>> +
>>>>>> +_total_M() {
>>>>>> +	local total=$(stat -f -c '%b' ${SCRATCH_MNT})
>>>>>> +	local bs=$(stat -f -c '%S' ${SCRATCH_MNT})
>>>>>> +	echo $(( ${total} * ${bs} / ${M}))
>>>>>> +}
>>>>>> +
>>>>>> +_used_percent() {
>>>>>> +	local available=$(stat -f -c '%a' ${SCRATCH_MNT})
>>>>>> +	local total=$(stat -f -c '%b' ${SCRATCH_MNT})
>>>>>> +	echo $((100 - (100 * ${available}) / ${total} ))
>>>>>> +}
>>>>>> +
>>>>>> +
>>>>>> +_delete_random_file() {
>>>>>> +	local to_delete=$(find ${SCRATCH_MNT} -type f | shuf | head -1)
>>>>>> +	rm $to_delete
>>>>>> +	sync ${SCRATCH_MNT}
>>>>>> +}
>>>>>> +
>>>>>> +_get_random_fsz() {
>>>>>> +	local r=$RANDOM
>>>>>> +	echo $(( ${min_fsz} + (${max_fsz} - ${min_fsz}) * (${r} % 100) / 100 ))
>>>>>> +}
>>>>>> +
>>>>>> +_direct_fillup () {
>>>>>> +	while [ $(_used_percent) -lt $fill_percent ]; do
>>>>>> +		local fsz=$(_get_random_fsz)
>>>>>> +
>>>>>> +		_create_file $seq $fsz "oflag=direct conv=fsync"
>>>>>> +		seq=$((${seq} + 1))
>>>>>> +	done
>>>>>> +}
>>>>>> +
>>>>>> +_mixed_write_delete() {
>>>>>> +	local dd_extra=$1
>>>>>> +	local total_M=$(_total_M)
>>>>>> +	local to_write_M=$(( ${overwrite_percentage} * ${total_M} / 100 ))
>>>>>> +	local written_M=0
>>>>>> +
>>>>>> +	while [ $written_M -lt $to_write_M ]; do
>>>>>> +		if [ $(_used_percent) -lt $fill_percent ]; then
>>>>>> +			local fsz=$(_get_random_fsz)
>>>>>> +
>>>>>> +			_create_file $seq $fsz "$dd_extra"
>>>>>> +			written_M=$((${written_M} + ${fsz}/${M}))
>>>>>> +			seq=$((${seq} + 1))
>>>>>> +		else
>>>>>> +			_delete_random_file
>>>>>> +		fi
>>>>>> +	done
>>>>>> +}
>>>>>> +
>>>>>> +seed=$RANDOM
>>>>>> +RANDOM=$seed
>>>>>> +echo "Running test with seed=$seed" >>$seqres.full
>>>>>> +
>>>>>> +_scratch_mkfs_sized $((8 * 1024 * 1024 * 1024)) >>$seqres.full
>>>>>> +_scratch_mount
>>>>>> +
>>>>>> +echo "Starting fillup using direct IO"
>>>>>> +_direct_fillup
>>>>>> +
>>>>>> +echo "Starting mixed write/delete test using direct IO"
>>>>>> +_mixed_write_delete "oflag=direct"
>>>>>> +
>>>>>> +echo "Starting mixed write/delete test using buffered IO"
>>>>>> +_mixed_write_delete ""
>>>>>> +
>>>>>> +echo "Syncing"
>>>>>> +sync ${SCRATCH_MNT}/*
>>>>>> +
>>>>>> +echo "Done, all good"
>>>>>> +
>>>>>> +# success, all done
>>>>>> +status=0
>>>>>> +exit
>>>>>> diff --git a/tests/generic/744.out b/tests/generic/744.out
>>>>>> new file mode 100644
>>>>>> index 000000000000..b40c2f43108e
>>>>>> --- /dev/null
>>>>>> +++ b/tests/generic/744.out
>>>>>> @@ -0,0 +1,6 @@
>>>>>> +QA output created by 744
>>>>>> +Starting fillup using direct IO
>>>>>> +Starting mixed write/delete test using direct IO
>>>>>> +Starting mixed write/delete test using buffered IO
>>>>>> +Syncing
>>>>>> +Done, all good
>>>>>
>>>>
>>>
>>>
>>
>
Hans Holmberg May 8, 2024, 7:08 a.m. UTC | #7
On 2024-04-17 16:50, Hans Holmberg wrote:
> On 2024-04-17 16:07, Zorro Lang wrote:
>> On Wed, Apr 17, 2024 at 01:21:39PM +0000, Hans Holmberg wrote:
>>> On 2024-04-17 14:43, Zorro Lang wrote:
>>>> On Tue, Apr 16, 2024 at 11:54:37AM -0700, Darrick J. Wong wrote:
>>>>> On Tue, Apr 16, 2024 at 09:07:43AM +0000, Hans Holmberg wrote:
>>>>>> +Zorro (doh!)
>>>>>>
>>>>>> On 2024-04-15 13:23, Hans Holmberg wrote:
>>>>>>> This test stresses garbage collection for file systems by first filling
>>>>>>> up a scratch mount to a specific usage point with files of random size,
>>>>>>> then doing overwrites in parallel with deletes to fragment the backing
>>>>>>> storage, forcing reclaim.
>>>>>>>
>>>>>>> Signed-off-by: Hans Holmberg <hans.holmberg@wdc.com>
>>>>>>> ---
>>>>>>>
>>>>>>> Test results in my setup (kernel 6.8.0-rc4+)
>>>>>>> 	f2fs on zoned nullblk: pass (77s)
>>>>>>> 	f2fs on conventional nvme ssd: pass (13s)
>>>>>>> 	btrfs on zoned nublk: fails (-ENOSPC)
>>>>>>> 	btrfs on conventional nvme ssd: fails (-ENOSPC)
>>>>>>> 	xfs on conventional nvme ssd: pass (8s)
>>>>>>>
>>>>>>> Johannes(cc) is working on the btrfs ENOSPC issue.
>>>>>>> 	
>>>>>>>      tests/generic/744     | 124 ++++++++++++++++++++++++++++++++++++++++++
>>>>>>>      tests/generic/744.out |   6 ++
>>>>>>>      2 files changed, 130 insertions(+)
>>>>>>>      create mode 100755 tests/generic/744
>>>>>>>      create mode 100644 tests/generic/744.out
>>>>>>>
>>>>>>> diff --git a/tests/generic/744 b/tests/generic/744
>>>>>>> new file mode 100755
>>>>>>> index 000000000000..2c7ab76bf8b1
>>>>>>> --- /dev/null
>>>>>>> +++ b/tests/generic/744
>>>>>>> @@ -0,0 +1,124 @@
>>>>>>> +#! /bin/bash
>>>>>>> +# SPDX-License-Identifier: GPL-2.0
>>>>>>> +# Copyright (c) 2024 Western Digital Corporation.  All Rights Reserved.
>>>>>>> +#
>>>>>>> +# FS QA Test No. 744
>>>>>>> +#
>>>>>>> +# Inspired by btrfs/273 and generic/015
>>>>>>> +#
>>>>>>> +# This test stresses garbage collection in file systems
>>>>>>> +# by first filling up a scratch mount to a specific usage point with
>>>>>>> +# files of random size, then doing overwrites in parallel with
>>>>>>> +# deletes to fragment the backing zones, forcing reclaim.
>>>>>>> +
>>>>>>> +. ./common/preamble
>>>>>>> +_begin_fstest auto
>>>>>>> +
>>>>>>> +# real QA test starts here
>>>>>>> +
>>>>>>> +_require_scratch
>>>>>>> +
>>>>>>> +# This test requires specific data space usage, skip if we have compression
>>>>>>> +# enabled.
>>>>>>> +_require_no_compress
>>>>>>> +
>>>>>>> +M=$((1024 * 1024))
>>>>>>> +min_fsz=$((1 * ${M}))
>>>>>>> +max_fsz=$((256 * ${M}))
>>>>>>> +bs=${M}
>>>>>>> +fill_percent=95
>>>>>>> +overwrite_percentage=20
>>>>>>> +seq=0
>>>>>>> +
>>>>>>> +_create_file() {
>>>>>>> +	local file_name=${SCRATCH_MNT}/data_$1
>>>>>>> +	local file_sz=$2
>>>>>>> +	local dd_extra=$3
>>>>>>> +
>>>>>>> +	POSIXLY_CORRECT=yes dd if=/dev/zero of=${file_name} \
>>>>>>> +		bs=${bs} count=$(( $file_sz / ${bs} )) \
>>>>>>> +		status=none $dd_extra  2>&1
>>>>>>> +
>>>>>>> +	status=$?
>>>>>>> +	if [ $status -ne 0 ]; then
>>>>>>> +		echo "Failed writing $file_name" >>$seqres.full
>>>>>>> +		exit
>>>>>>> +	fi
>>>>>>> +}
>>>>>
>>>>> I wonder, is there a particular reason for doing all these file
>>>>> operations with shell code instead of using fsstress to create and
>>>>> delete files to fill the fs and stress all the zone-gc code?  This test
>>>>> reminds me a lot of generic/476 but with more fork()ing.
>>>>
>>>> /me has the same confusion. Can this test cover more things than using
>>>> fsstress (to do reclaim test) ? Or does it uncover some known bugs which
>>>> other cases can't?
>>>
>>> ah, adding some more background is probably useful:
>>>
>>> I've been using this test to stress the crap out the zoned xfs garbage
>>> collection / write throttling implementation for zoned rt subvolumes
>>> support in xfs and it has found a number of issues during implementation
>>> that i did not reproduce by other means.
>>>
>>> I think it also has wider applicability as it triggers bugs in btrfs.
>>> f2fs passes without issues, but probably benefits from a quick smoke gc
>>> test as well. Discussed this with Bart and Daeho (now in cc) before
>>> submitting.
>>>
>>> Using fsstress would be cool, but as far as I can tell it cannot
>>> be told to operate at a specific file system usage point, which
>>> is a key thing for this test.
>>
>> As a random test case, if this case can be transformed to use fsstress to cover
>> same issues, that would be nice.
>>
>> But if as a regression test case, it has its particular test coverage, and the
>> issue it covered can't be reproduced by fsstress way, then let's work on this
>> bash script one.
>>
>> Any thoughts?
> 
> Yeah, I think bash is preferable for this particular test case.
> Bash also makes it easy to hack for people's private uses.
> 
> I use longer versions of this test (increasing overwrite_percentage)
> for weekly testing.
> 
> If we need fsstress for reproducing any future gc bug we can add
> whats missing to it then.
> 
> Does that make sense?
> 

Hey Zorro,

Any remaining concerns for adding this test? I could run it across
more file systems(bcachefs could be interesting) and share the results 
if needed be.

Thanks,
Hans
Zorro Lang May 8, 2024, 8:51 a.m. UTC | #8
On Wed, May 08, 2024 at 07:08:01AM +0000, Hans Holmberg wrote:
> On 2024-04-17 16:50, Hans Holmberg wrote:
> > On 2024-04-17 16:07, Zorro Lang wrote:
> >> On Wed, Apr 17, 2024 at 01:21:39PM +0000, Hans Holmberg wrote:
> >>> On 2024-04-17 14:43, Zorro Lang wrote:
> >>>> On Tue, Apr 16, 2024 at 11:54:37AM -0700, Darrick J. Wong wrote:
> >>>>> On Tue, Apr 16, 2024 at 09:07:43AM +0000, Hans Holmberg wrote:
> >>>>>> +Zorro (doh!)
> >>>>>>
> >>>>>> On 2024-04-15 13:23, Hans Holmberg wrote:
> >>>>>>> This test stresses garbage collection for file systems by first filling
> >>>>>>> up a scratch mount to a specific usage point with files of random size,
> >>>>>>> then doing overwrites in parallel with deletes to fragment the backing
> >>>>>>> storage, forcing reclaim.
> >>>>>>>
> >>>>>>> Signed-off-by: Hans Holmberg <hans.holmberg@wdc.com>
> >>>>>>> ---
> >>>>>>>
> >>>>>>> Test results in my setup (kernel 6.8.0-rc4+)
> >>>>>>> 	f2fs on zoned nullblk: pass (77s)
> >>>>>>> 	f2fs on conventional nvme ssd: pass (13s)
> >>>>>>> 	btrfs on zoned nublk: fails (-ENOSPC)
> >>>>>>> 	btrfs on conventional nvme ssd: fails (-ENOSPC)
> >>>>>>> 	xfs on conventional nvme ssd: pass (8s)
> >>>>>>>
> >>>>>>> Johannes(cc) is working on the btrfs ENOSPC issue.
> >>>>>>> 	
> >>>>>>>      tests/generic/744     | 124 ++++++++++++++++++++++++++++++++++++++++++
> >>>>>>>      tests/generic/744.out |   6 ++
> >>>>>>>      2 files changed, 130 insertions(+)
> >>>>>>>      create mode 100755 tests/generic/744
> >>>>>>>      create mode 100644 tests/generic/744.out
> >>>>>>>
> >>>>>>> diff --git a/tests/generic/744 b/tests/generic/744
> >>>>>>> new file mode 100755
> >>>>>>> index 000000000000..2c7ab76bf8b1
> >>>>>>> --- /dev/null
> >>>>>>> +++ b/tests/generic/744
> >>>>>>> @@ -0,0 +1,124 @@
> >>>>>>> +#! /bin/bash
> >>>>>>> +# SPDX-License-Identifier: GPL-2.0
> >>>>>>> +# Copyright (c) 2024 Western Digital Corporation.  All Rights Reserved.
> >>>>>>> +#
> >>>>>>> +# FS QA Test No. 744
> >>>>>>> +#
> >>>>>>> +# Inspired by btrfs/273 and generic/015
> >>>>>>> +#
> >>>>>>> +# This test stresses garbage collection in file systems
> >>>>>>> +# by first filling up a scratch mount to a specific usage point with
> >>>>>>> +# files of random size, then doing overwrites in parallel with
> >>>>>>> +# deletes to fragment the backing zones, forcing reclaim.
> >>>>>>> +
> >>>>>>> +. ./common/preamble
> >>>>>>> +_begin_fstest auto
> >>>>>>> +
> >>>>>>> +# real QA test starts here
> >>>>>>> +
> >>>>>>> +_require_scratch
> >>>>>>> +
> >>>>>>> +# This test requires specific data space usage, skip if we have compression
> >>>>>>> +# enabled.
> >>>>>>> +_require_no_compress
> >>>>>>> +
> >>>>>>> +M=$((1024 * 1024))
> >>>>>>> +min_fsz=$((1 * ${M}))
> >>>>>>> +max_fsz=$((256 * ${M}))
> >>>>>>> +bs=${M}
> >>>>>>> +fill_percent=95
> >>>>>>> +overwrite_percentage=20
> >>>>>>> +seq=0
> >>>>>>> +
> >>>>>>> +_create_file() {
> >>>>>>> +	local file_name=${SCRATCH_MNT}/data_$1
> >>>>>>> +	local file_sz=$2
> >>>>>>> +	local dd_extra=$3
> >>>>>>> +
> >>>>>>> +	POSIXLY_CORRECT=yes dd if=/dev/zero of=${file_name} \
> >>>>>>> +		bs=${bs} count=$(( $file_sz / ${bs} )) \
> >>>>>>> +		status=none $dd_extra  2>&1
> >>>>>>> +
> >>>>>>> +	status=$?
> >>>>>>> +	if [ $status -ne 0 ]; then
> >>>>>>> +		echo "Failed writing $file_name" >>$seqres.full
> >>>>>>> +		exit
> >>>>>>> +	fi
> >>>>>>> +}
> >>>>>
> >>>>> I wonder, is there a particular reason for doing all these file
> >>>>> operations with shell code instead of using fsstress to create and
> >>>>> delete files to fill the fs and stress all the zone-gc code?  This test
> >>>>> reminds me a lot of generic/476 but with more fork()ing.
> >>>>
> >>>> /me has the same confusion. Can this test cover more things than using
> >>>> fsstress (to do reclaim test) ? Or does it uncover some known bugs which
> >>>> other cases can't?
> >>>
> >>> ah, adding some more background is probably useful:
> >>>
> >>> I've been using this test to stress the crap out the zoned xfs garbage
> >>> collection / write throttling implementation for zoned rt subvolumes
> >>> support in xfs and it has found a number of issues during implementation
> >>> that i did not reproduce by other means.
> >>>
> >>> I think it also has wider applicability as it triggers bugs in btrfs.
> >>> f2fs passes without issues, but probably benefits from a quick smoke gc
> >>> test as well. Discussed this with Bart and Daeho (now in cc) before
> >>> submitting.
> >>>
> >>> Using fsstress would be cool, but as far as I can tell it cannot
> >>> be told to operate at a specific file system usage point, which
> >>> is a key thing for this test.
> >>
> >> As a random test case, if this case can be transformed to use fsstress to cover
> >> same issues, that would be nice.
> >>
> >> But if as a regression test case, it has its particular test coverage, and the
> >> issue it covered can't be reproduced by fsstress way, then let's work on this
> >> bash script one.
> >>
> >> Any thoughts?
> > 
> > Yeah, I think bash is preferable for this particular test case.
> > Bash also makes it easy to hack for people's private uses.
> > 
> > I use longer versions of this test (increasing overwrite_percentage)
> > for weekly testing.
> > 
> > If we need fsstress for reproducing any future gc bug we can add
> > whats missing to it then.
> > 
> > Does that make sense?
> > 
> 
> Hey Zorro,
> 
> Any remaining concerns for adding this test? I could run it across
> more file systems(bcachefs could be interesting) and share the results 
> if needed be.

Hi,

I remembered you metioned btrfs fails on this test, and I can reproduce it
on btrfs [1] with general disk. Have you figured out the reason? I don't
want to give btrfs a test failure suddently without a proper explanation :)
If it's a case issue, better to fix it for btrfs.

Thanks,
Zorro

# ./check generic/744
FSTYP         -- btrfs
PLATFORM      -- Linux/x86_64 hp-dl380pg8-01 6.9.0-0.rc5.20240425gite88c4cfcb7b8.47.fc41.x86_64 #1 SMP PREEMPT_DYNAMIC Thu Apr 25 14:21:52 UTC 2024
MKFS_OPTIONS  -- /dev/sda4
MOUNT_OPTIONS -- -o context=system_u:object_r:root_t:s0 /dev/sda4 /mnt/scratch

generic/744 115s ... [failed, exit status 1]- output mismatch (see /root/git/xfstests/results//generic/744.out.bad)
    --- tests/generic/744.out   2024-05-08 16:11:14.476635417 +0800
    +++ /root/git/xfstests/results//generic/744.out.bad 2024-05-08 16:46:03.617194377 +0800
    @@ -2,5 +2,4 @@
     Starting fillup using direct IO
     Starting mixed write/delete test using direct IO
     Starting mixed write/delete test using buffered IO
    -Syncing
    -Done, all good
    +dd: error writing '/mnt/scratch/data_82': No space left on device
    ...
    (Run 'diff -u /root/git/xfstests/tests/generic/744.out /root/git/xfstests/results//generic/744.out.bad'  to see the entire diff)
Ran: generic/744
Failures: generic/744
Failed 1 of 1 tests

> 
> Thanks,
> Hans
Qu Wenruo May 8, 2024, 9:28 a.m. UTC | #9
在 2024/5/8 18:21, Zorro Lang 写道:
[...]
>>>
>>
>> Hey Zorro,
>>
>> Any remaining concerns for adding this test? I could run it across
>> more file systems(bcachefs could be interesting) and share the results
>> if needed be.
>
> Hi,
>
> I remembered you metioned btrfs fails on this test, and I can reproduce it
> on btrfs [1] with general disk. Have you figured out the reason? I don't
> want to give btrfs a test failure suddently without a proper explanation :)
> If it's a case issue, better to fix it for btrfs.
>
> Thanks,
> Zorro
>
> # ./check generic/744
> FSTYP         -- btrfs
> PLATFORM      -- Linux/x86_64 hp-dl380pg8-01 6.9.0-0.rc5.20240425gite88c4cfcb7b8.47.fc41.x86_64 #1 SMP PREEMPT_DYNAMIC Thu Apr 25 14:21:52 UTC 2024
> MKFS_OPTIONS  -- /dev/sda4
> MOUNT_OPTIONS -- -o context=system_u:object_r:root_t:s0 /dev/sda4 /mnt/scratch
>
> generic/744 115s ... [failed, exit status 1]- output mismatch (see /root/git/xfstests/results//generic/744.out.bad)
>      --- tests/generic/744.out   2024-05-08 16:11:14.476635417 +0800
>      +++ /root/git/xfstests/results//generic/744.out.bad 2024-05-08 16:46:03.617194377 +0800
>      @@ -2,5 +2,4 @@
>       Starting fillup using direct IO
>       Starting mixed write/delete test using direct IO
>       Starting mixed write/delete test using buffered IO
>      -Syncing
>      -Done, all good
>      +dd: error writing '/mnt/scratch/data_82': No space left on device

[POSSIBLE CAUSE]
Not an expert on zoned support, but even with the 95% fill rate setup,
the test case still go fully filled btrfs data, thus no more data can be
written.

My guess is, the available space has taken some metadata space into
consideration, thus at the end of the final available bytes of data
space, the `stat -f -c '%a'` still reports some value larger than 5%.

But as long as the data space is full filled up, btrfs notice that there
is no way to allocate more data, thus reports its available bytes as 0.

This means, the available space report is always beyond 5%, then
suddenly dropped to 0, causing the test script to fail.

Unfortunately I do not have any good idea that can easily solve the
problem. Due to the nature of dynamic block groups allocation, the
available/free space reporting is always not that reliable.

[WORKAROUND?]
I'm just wondering if it's possible that, can we fill up the fs to 100%
(hitting ENOSPC), then just remove 5% of all the files to emulate 95%
filled up fs?

By this, it can be a more accurate way to emulate 95% used data space,
without relying on the fs specific available space reporting.

Thanks,
Qu
>      ...
>      (Run 'diff -u /root/git/xfstests/tests/generic/744.out /root/git/xfstests/results//generic/744.out.bad'  to see the entire diff)
> Ran: generic/744
> Failures: generic/744
> Failed 1 of 1 tests
>
>>
>> Thanks,
>> Hans
>
>
Johannes Thumshirn May 8, 2024, 11:02 a.m. UTC | #10
On 08.05.24 11:28, Qu Wenruo wrote:
> 
> 
> 在 2024/5/8 18:21, Zorro Lang 写道:
> [...]
>>>>
>>>
>>> Hey Zorro,
>>>
>>> Any remaining concerns for adding this test? I could run it across
>>> more file systems(bcachefs could be interesting) and share the results
>>> if needed be.
>>
>> Hi,
>>
>> I remembered you metioned btrfs fails on this test, and I can reproduce it
>> on btrfs [1] with general disk. Have you figured out the reason? I don't
>> want to give btrfs a test failure suddently without a proper explanation :)
>> If it's a case issue, better to fix it for btrfs.
>>
>> Thanks,
>> Zorro
>>
>> # ./check generic/744
>> FSTYP         -- btrfs
>> PLATFORM      -- Linux/x86_64 hp-dl380pg8-01 6.9.0-0.rc5.20240425gite88c4cfcb7b8.47.fc41.x86_64 #1 SMP PREEMPT_DYNAMIC Thu Apr 25 14:21:52 UTC 2024
>> MKFS_OPTIONS  -- /dev/sda4
>> MOUNT_OPTIONS -- -o context=system_u:object_r:root_t:s0 /dev/sda4 /mnt/scratch
>>
>> generic/744 115s ... [failed, exit status 1]- output mismatch (see /root/git/xfstests/results//generic/744.out.bad)
>>       --- tests/generic/744.out   2024-05-08 16:11:14.476635417 +0800
>>       +++ /root/git/xfstests/results//generic/744.out.bad 2024-05-08 16:46:03.617194377 +0800
>>       @@ -2,5 +2,4 @@
>>        Starting fillup using direct IO
>>        Starting mixed write/delete test using direct IO
>>        Starting mixed write/delete test using buffered IO
>>       -Syncing
>>       -Done, all good
>>       +dd: error writing '/mnt/scratch/data_82': No space left on device
> 
> [POSSIBLE CAUSE]
> Not an expert on zoned support, but even with the 95% fill rate setup,
> the test case still go fully filled btrfs data, thus no more data can be
> written.

Yes I /think/ Zorro's report above is with a regular (i.e. non-zoned) setup.

> My guess is, the available space has taken some metadata space into
> consideration, thus at the end of the final available bytes of data
> space, the `stat -f -c '%a'` still reports some value larger than 5%.
> 
> But as long as the data space is full filled up, btrfs notice that there
> is no way to allocate more data, thus reports its available bytes as 0.
> 
> This means, the available space report is always beyond 5%, then
> suddenly dropped to 0, causing the test script to fail.
> 
> Unfortunately I do not have any good idea that can easily solve the
> problem. Due to the nature of dynamic block groups allocation, the
> available/free space reporting is always not that reliable.
> 
> [WORKAROUND?]
> I'm just wondering if it's possible that, can we fill up the fs to 100%
> (hitting ENOSPC), then just remove 5% of all the files to emulate 95%
> filled up fs?
> 
> By this, it can be a more accurate way to emulate 95% used data space,
> without relying on the fs specific available space reporting.

This won't work on zoned though. If we fill to 100% and then remove 5% 
we'd still need to run balance/gc to really free up that 5%.

And there comes a 2nd problem, for zoned we need to reserve at least one 
block-group as a relocation target (I did send an RFC patch for that a 
while ago [1]).

[1] 
https://lore.kernel.org/linux-btrfs/1480374e3f65371d4b857fb45a3fd9f6a5fa4a25.1713357984.git.jth@kernel.org/
Christoph Hellwig May 9, 2024, 5:43 a.m. UTC | #11
[really annoying multi-level full quote snipped]

On Wed, May 08, 2024 at 04:51:35PM +0800, Zorro Lang wrote:
> I remembered you metioned btrfs fails on this test, and I can reproduce it
> on btrfs [1] with general disk. Have you figured out the reason? I don't
> want to give btrfs a test failure suddently without a proper explanation :)
> If it's a case issue, better to fix it for btrfs.

As a rule of thumb, what do we about generally useful tests that fail
on a fs due to fs bugs?  Not adding the test seems a bit counter productive.
Do we need a

_expected_failure $FSTYP

helper to annotate them instead of blocking the test?
Zorro Lang May 9, 2024, 9:42 a.m. UTC | #12
On Thu, May 09, 2024 at 07:43:47AM +0200, hch@lst.de wrote:
> [really annoying multi-level full quote snipped]
> 
> On Wed, May 08, 2024 at 04:51:35PM +0800, Zorro Lang wrote:
> > I remembered you metioned btrfs fails on this test, and I can reproduce it
> > on btrfs [1] with general disk. Have you figured out the reason? I don't
> > want to give btrfs a test failure suddently without a proper explanation :)
> > If it's a case issue, better to fix it for btrfs.
> 
> As a rule of thumb, what do we about generally useful tests that fail
> on a fs due to fs bugs?  Not adding the test seems a bit counter productive.
> Do we need a
> 
> _expected_failure $FSTYP
> 
> helper to annotate them instead of blocking the test?

Hmm, what kind of situation is this _expected_failure for?

For now we have two methods to deal with a test failure:

1) If a test always fails on a fs, and can't be fixed (in case or kernel). We can
add this fs type into black list of the case, e.g. _supported_fs ^$fstype

2) If a test fails on a fs as an expected bug? We have _fixed_by_xxx ... or
_wants_xxx_commit helpers to record that.

3) Besides that, I generally metion some new failures in [ANNOUNCE] email of each
release. (That's the last way I can choose).

I hope we can fix the obvious case issue in reviewing phase, or deal with the
failure by 1) or 2). For this patch, I think we can find a way to avoid the
failure for btrfs, or let this test "not supported" by btrfs. Or any other
better ideas :)

Thanks,
Zorro

>
Christoph Hellwig May 9, 2024, 12:54 p.m. UTC | #13
On Thu, May 09, 2024 at 05:42:08PM +0800, Zorro Lang wrote:
> Hmm, what kind of situation is this _expected_failure for?

Well, the one we are talking about here.  We have a new and useful
test, and a file systems fails it because it has a bug.

Personally I'd be fine with just letting it fail, but you seemed to
indicate that this is a reason to not merge the test yet.

> I hope we can fix the obvious case issue in reviewing phase, or deal with the
> failure by 1) or 2). For this patch, I think we can find a way to avoid the
> failure for btrfs, or let this test "not supported" by btrfs. Or any other
> better ideas :)

It is a normal use case that every file system should handle and btrfs
developers are looking into it, but it might take a while.
Zorro Lang May 10, 2024, 3:21 a.m. UTC | #14
On Thu, May 09, 2024 at 02:54:12PM +0200, hch@lst.de wrote:
> On Thu, May 09, 2024 at 05:42:08PM +0800, Zorro Lang wrote:
> > Hmm, what kind of situation is this _expected_failure for?
> 
> Well, the one we are talking about here.  We have a new and useful
> test, and a file systems fails it because it has a bug.
> 
> Personally I'd be fine with just letting it fail, but you seemed to
> indicate that this is a reason to not merge the test yet.

The failure itself is not the reason to not merge :) It's not clear
what this case tests for, especially there's a failure. If it's a
regression test case, we can mark the kernel commit.

Or if we treat it as a simple stress test for "garbage collection in
file systems", does it bring in more test coverage? As the "garbage
collection" is common, most of random stress test cases cover that.
But sure, I can treat it as a generic version of btrfs/273. It's copied
from btrfs case, then fail on btrfs. So I hope to know what's wrong :)

> 
> > I hope we can fix the obvious case issue in reviewing phase, or deal with the
> > failure by 1) or 2). For this patch, I think we can find a way to avoid the
> > failure for btrfs, or let this test "not supported" by btrfs. Or any other
> > better ideas :)
> 
> It is a normal use case that every file system should handle and btrfs
> developers are looking into it, but it might take a while.

If it needs longer time to fix, and if btrfs list (has known and) doesn't mind
this failure, I can merge it into "patches-in-queue" branch at first. If we
find a way to fix it before next release, let's fix, or I'll push it. Does that
make sense to you?
(CC btrfs list)

Thanks,
Zorro

>
Hans Holmberg May 11, 2024, 1:08 p.m. UTC | #15
On 2024-05-08 10:51, Zorro Lang wrote:
> On Wed, May 08, 2024 at 07:08:01AM +0000, Hans Holmberg wrote:
>> On 2024-04-17 16:50, Hans Holmberg wrote:
>>> On 2024-04-17 16:07, Zorro Lang wrote:
>>>> On Wed, Apr 17, 2024 at 01:21:39PM +0000, Hans Holmberg wrote:
>>>>> On 2024-04-17 14:43, Zorro Lang wrote:
>>>>>> On Tue, Apr 16, 2024 at 11:54:37AM -0700, Darrick J. Wong wrote:
>>>>>>> On Tue, Apr 16, 2024 at 09:07:43AM +0000, Hans Holmberg wrote:
>>>>>>>> +Zorro (doh!)
>>>>>>>>
>>>>>>>> On 2024-04-15 13:23, Hans Holmberg wrote:
>>>>>>>>> This test stresses garbage collection for file systems by first filling
>>>>>>>>> up a scratch mount to a specific usage point with files of random size,
>>>>>>>>> then doing overwrites in parallel with deletes to fragment the backing
>>>>>>>>> storage, forcing reclaim.
>>>>>>>>>
>>>>>>>>> Signed-off-by: Hans Holmberg <hans.holmberg@wdc.com>
>>>>>>>>> ---
>>>>>>>>>
>>>>>>>>> Test results in my setup (kernel 6.8.0-rc4+)
>>>>>>>>> 	f2fs on zoned nullblk: pass (77s)
>>>>>>>>> 	f2fs on conventional nvme ssd: pass (13s)
>>>>>>>>> 	btrfs on zoned nublk: fails (-ENOSPC)
>>>>>>>>> 	btrfs on conventional nvme ssd: fails (-ENOSPC)
>>>>>>>>> 	xfs on conventional nvme ssd: pass (8s)
>>>>>>>>>
>>>>>>>>> Johannes(cc) is working on the btrfs ENOSPC issue.
>>>>>>>>> 	
>>>>>>>>>       tests/generic/744     | 124 ++++++++++++++++++++++++++++++++++++++++++
>>>>>>>>>       tests/generic/744.out |   6 ++
>>>>>>>>>       2 files changed, 130 insertions(+)
>>>>>>>>>       create mode 100755 tests/generic/744
>>>>>>>>>       create mode 100644 tests/generic/744.out
>>>>>>>>>
>>>>>>>>> diff --git a/tests/generic/744 b/tests/generic/744
>>>>>>>>> new file mode 100755
>>>>>>>>> index 000000000000..2c7ab76bf8b1
>>>>>>>>> --- /dev/null
>>>>>>>>> +++ b/tests/generic/744
>>>>>>>>> @@ -0,0 +1,124 @@
>>>>>>>>> +#! /bin/bash
>>>>>>>>> +# SPDX-License-Identifier: GPL-2.0
>>>>>>>>> +# Copyright (c) 2024 Western Digital Corporation.  All Rights Reserved.
>>>>>>>>> +#
>>>>>>>>> +# FS QA Test No. 744
>>>>>>>>> +#
>>>>>>>>> +# Inspired by btrfs/273 and generic/015
>>>>>>>>> +#
>>>>>>>>> +# This test stresses garbage collection in file systems
>>>>>>>>> +# by first filling up a scratch mount to a specific usage point with
>>>>>>>>> +# files of random size, then doing overwrites in parallel with
>>>>>>>>> +# deletes to fragment the backing zones, forcing reclaim.
>>>>>>>>> +
>>>>>>>>> +. ./common/preamble
>>>>>>>>> +_begin_fstest auto
>>>>>>>>> +
>>>>>>>>> +# real QA test starts here
>>>>>>>>> +
>>>>>>>>> +_require_scratch
>>>>>>>>> +
>>>>>>>>> +# This test requires specific data space usage, skip if we have compression
>>>>>>>>> +# enabled.
>>>>>>>>> +_require_no_compress
>>>>>>>>> +
>>>>>>>>> +M=$((1024 * 1024))
>>>>>>>>> +min_fsz=$((1 * ${M}))
>>>>>>>>> +max_fsz=$((256 * ${M}))
>>>>>>>>> +bs=${M}
>>>>>>>>> +fill_percent=95
>>>>>>>>> +overwrite_percentage=20
>>>>>>>>> +seq=0
>>>>>>>>> +
>>>>>>>>> +_create_file() {
>>>>>>>>> +	local file_name=${SCRATCH_MNT}/data_$1
>>>>>>>>> +	local file_sz=$2
>>>>>>>>> +	local dd_extra=$3
>>>>>>>>> +
>>>>>>>>> +	POSIXLY_CORRECT=yes dd if=/dev/zero of=${file_name} \
>>>>>>>>> +		bs=${bs} count=$(( $file_sz / ${bs} )) \
>>>>>>>>> +		status=none $dd_extra  2>&1
>>>>>>>>> +
>>>>>>>>> +	status=$?
>>>>>>>>> +	if [ $status -ne 0 ]; then
>>>>>>>>> +		echo "Failed writing $file_name" >>$seqres.full
>>>>>>>>> +		exit
>>>>>>>>> +	fi
>>>>>>>>> +}
>>>>>>>
>>>>>>> I wonder, is there a particular reason for doing all these file
>>>>>>> operations with shell code instead of using fsstress to create and
>>>>>>> delete files to fill the fs and stress all the zone-gc code?  This test
>>>>>>> reminds me a lot of generic/476 but with more fork()ing.
>>>>>>
>>>>>> /me has the same confusion. Can this test cover more things than using
>>>>>> fsstress (to do reclaim test) ? Or does it uncover some known bugs which
>>>>>> other cases can't?
>>>>>
>>>>> ah, adding some more background is probably useful:
>>>>>
>>>>> I've been using this test to stress the crap out the zoned xfs garbage
>>>>> collection / write throttling implementation for zoned rt subvolumes
>>>>> support in xfs and it has found a number of issues during implementation
>>>>> that i did not reproduce by other means.
>>>>>
>>>>> I think it also has wider applicability as it triggers bugs in btrfs.
>>>>> f2fs passes without issues, but probably benefits from a quick smoke gc
>>>>> test as well. Discussed this with Bart and Daeho (now in cc) before
>>>>> submitting.
>>>>>
>>>>> Using fsstress would be cool, but as far as I can tell it cannot
>>>>> be told to operate at a specific file system usage point, which
>>>>> is a key thing for this test.
>>>>
>>>> As a random test case, if this case can be transformed to use fsstress to cover
>>>> same issues, that would be nice.
>>>>
>>>> But if as a regression test case, it has its particular test coverage, and the
>>>> issue it covered can't be reproduced by fsstress way, then let's work on this
>>>> bash script one.
>>>>
>>>> Any thoughts?
>>>
>>> Yeah, I think bash is preferable for this particular test case.
>>> Bash also makes it easy to hack for people's private uses.
>>>
>>> I use longer versions of this test (increasing overwrite_percentage)
>>> for weekly testing.
>>>
>>> If we need fsstress for reproducing any future gc bug we can add
>>> whats missing to it then.
>>>
>>> Does that make sense?
>>>
>>
>> Hey Zorro,
>>
>> Any remaining concerns for adding this test? I could run it across
>> more file systems(bcachefs could be interesting) and share the results
>> if needed be.
> 
> Hi,
> 
> I remembered you metioned btrfs fails on this test, and I can reproduce it
> on btrfs [1] with general disk. Have you figured out the reason? I don't
> want to give btrfs a test failure suddently without a proper explanation :)
> If it's a case issue, better to fix it for btrfs.


I was surprised to see the failure for brtrfs on a conventional block
device, but have not dug into it. I suspect/assume it's the same root
cause as the issue Johannes is looking into when using a zoned block
device as backing storage.

I debugged that a bit with Johannes, and noticed that if I manually
kick btrfs rebalancing after each write via sysfs, the test progresses
further (but super slow).

So *I think* that btrfs needs to:

* tune the triggering of gc to kick in way before available free space
   runs out
* start slowing down / blocking writes when reclaim pressure is high to
   avoid premature -ENOSPC:es.

It's a pretty nasty problem, as potentially any write could -ENOSPC
long before the reported available space runs out when a workload
ends up fragmenting the disk and write pressure is high..


Thanks,
Hans (back from a couple of days away from email)



> 
> Thanks,
> Zorro
> 
> # ./check generic/744
> FSTYP         -- btrfs
> PLATFORM      -- Linux/x86_64 hp-dl380pg8-01 6.9.0-0.rc5.20240425gite88c4cfcb7b8.47.fc41.x86_64 #1 SMP PREEMPT_DYNAMIC Thu Apr 25 14:21:52 UTC 2024
> MKFS_OPTIONS  -- /dev/sda4
> MOUNT_OPTIONS -- -o context=system_u:object_r:root_t:s0 /dev/sda4 /mnt/scratch
> 
> generic/744 115s ... [failed, exit status 1]- output mismatch (see /root/git/xfstests/results//generic/744.out.bad)
>      --- tests/generic/744.out   2024-05-08 16:11:14.476635417 +0800
>      +++ /root/git/xfstests/results//generic/744.out.bad 2024-05-08 16:46:03.617194377 +0800
>      @@ -2,5 +2,4 @@
>       Starting fillup using direct IO
>       Starting mixed write/delete test using direct IO
>       Starting mixed write/delete test using buffered IO
>      -Syncing
>      -Done, all good
>      +dd: error writing '/mnt/scratch/data_82': No space left on device
>      ...
>      (Run 'diff -u /root/git/xfstests/tests/generic/744.out /root/git/xfstests/results//generic/744.out.bad'  to see the entire diff)
> Ran: generic/744
> Failures: generic/744
> Failed 1 of 1 tests
> 
>>
>> Thanks,
>> Hans
> 
>
Johannes Thumshirn May 12, 2024, 4:54 p.m. UTC | #16
[ +CC Boris ]
On 11.05.24 07:08, Hans Holmberg wrote:
> On 2024-05-08 10:51, Zorro Lang wrote:
>> On Wed, May 08, 2024 at 07:08:01AM +0000, Hans Holmberg wrote:
>>> On 2024-04-17 16:50, Hans Holmberg wrote:
>>>> On 2024-04-17 16:07, Zorro Lang wrote:
>>>>> On Wed, Apr 17, 2024 at 01:21:39PM +0000, Hans Holmberg wrote:
>>>>>> On 2024-04-17 14:43, Zorro Lang wrote:
>>>>>>> On Tue, Apr 16, 2024 at 11:54:37AM -0700, Darrick J. Wong wrote:
>>>>>>>> On Tue, Apr 16, 2024 at 09:07:43AM +0000, Hans Holmberg wrote:
>>>>>>>>> +Zorro (doh!)
>>>>>>>>>
>>>>>>>>> On 2024-04-15 13:23, Hans Holmberg wrote:
>>>>>>>>>> This test stresses garbage collection for file systems by first filling
>>>>>>>>>> up a scratch mount to a specific usage point with files of random size,
>>>>>>>>>> then doing overwrites in parallel with deletes to fragment the backing
>>>>>>>>>> storage, forcing reclaim.
>>>>>>>>>>
>>>>>>>>>> Signed-off-by: Hans Holmberg <hans.holmberg@wdc.com>
>>>>>>>>>> ---
>>>>>>>>>>
>>>>>>>>>> Test results in my setup (kernel 6.8.0-rc4+)
>>>>>>>>>> 	f2fs on zoned nullblk: pass (77s)
>>>>>>>>>> 	f2fs on conventional nvme ssd: pass (13s)
>>>>>>>>>> 	btrfs on zoned nublk: fails (-ENOSPC)
>>>>>>>>>> 	btrfs on conventional nvme ssd: fails (-ENOSPC)
>>>>>>>>>> 	xfs on conventional nvme ssd: pass (8s)
>>>>>>>>>>
>>>>>>>>>> Johannes(cc) is working on the btrfs ENOSPC issue.
>>>>>>>>>> 	
>>>>>>>>>>        tests/generic/744     | 124 ++++++++++++++++++++++++++++++++++++++++++
>>>>>>>>>>        tests/generic/744.out |   6 ++
>>>>>>>>>>        2 files changed, 130 insertions(+)
>>>>>>>>>>        create mode 100755 tests/generic/744
>>>>>>>>>>        create mode 100644 tests/generic/744.out
>>>>>>>>>>
>>>>>>>>>> diff --git a/tests/generic/744 b/tests/generic/744
>>>>>>>>>> new file mode 100755
>>>>>>>>>> index 000000000000..2c7ab76bf8b1
>>>>>>>>>> --- /dev/null
>>>>>>>>>> +++ b/tests/generic/744
>>>>>>>>>> @@ -0,0 +1,124 @@
>>>>>>>>>> +#! /bin/bash
>>>>>>>>>> +# SPDX-License-Identifier: GPL-2.0
>>>>>>>>>> +# Copyright (c) 2024 Western Digital Corporation.  All Rights Reserved.
>>>>>>>>>> +#
>>>>>>>>>> +# FS QA Test No. 744
>>>>>>>>>> +#
>>>>>>>>>> +# Inspired by btrfs/273 and generic/015
>>>>>>>>>> +#
>>>>>>>>>> +# This test stresses garbage collection in file systems
>>>>>>>>>> +# by first filling up a scratch mount to a specific usage point with
>>>>>>>>>> +# files of random size, then doing overwrites in parallel with
>>>>>>>>>> +# deletes to fragment the backing zones, forcing reclaim.
>>>>>>>>>> +
>>>>>>>>>> +. ./common/preamble
>>>>>>>>>> +_begin_fstest auto
>>>>>>>>>> +
>>>>>>>>>> +# real QA test starts here
>>>>>>>>>> +
>>>>>>>>>> +_require_scratch
>>>>>>>>>> +
>>>>>>>>>> +# This test requires specific data space usage, skip if we have compression
>>>>>>>>>> +# enabled.
>>>>>>>>>> +_require_no_compress
>>>>>>>>>> +
>>>>>>>>>> +M=$((1024 * 1024))
>>>>>>>>>> +min_fsz=$((1 * ${M}))
>>>>>>>>>> +max_fsz=$((256 * ${M}))
>>>>>>>>>> +bs=${M}
>>>>>>>>>> +fill_percent=95
>>>>>>>>>> +overwrite_percentage=20
>>>>>>>>>> +seq=0
>>>>>>>>>> +
>>>>>>>>>> +_create_file() {
>>>>>>>>>> +	local file_name=${SCRATCH_MNT}/data_$1
>>>>>>>>>> +	local file_sz=$2
>>>>>>>>>> +	local dd_extra=$3
>>>>>>>>>> +
>>>>>>>>>> +	POSIXLY_CORRECT=yes dd if=/dev/zero of=${file_name} \
>>>>>>>>>> +		bs=${bs} count=$(( $file_sz / ${bs} )) \
>>>>>>>>>> +		status=none $dd_extra  2>&1
>>>>>>>>>> +
>>>>>>>>>> +	status=$?
>>>>>>>>>> +	if [ $status -ne 0 ]; then
>>>>>>>>>> +		echo "Failed writing $file_name" >>$seqres.full
>>>>>>>>>> +		exit
>>>>>>>>>> +	fi
>>>>>>>>>> +}
>>>>>>>>
>>>>>>>> I wonder, is there a particular reason for doing all these file
>>>>>>>> operations with shell code instead of using fsstress to create and
>>>>>>>> delete files to fill the fs and stress all the zone-gc code?  This test
>>>>>>>> reminds me a lot of generic/476 but with more fork()ing.
>>>>>>>
>>>>>>> /me has the same confusion. Can this test cover more things than using
>>>>>>> fsstress (to do reclaim test) ? Or does it uncover some known bugs which
>>>>>>> other cases can't?
>>>>>>
>>>>>> ah, adding some more background is probably useful:
>>>>>>
>>>>>> I've been using this test to stress the crap out the zoned xfs garbage
>>>>>> collection / write throttling implementation for zoned rt subvolumes
>>>>>> support in xfs and it has found a number of issues during implementation
>>>>>> that i did not reproduce by other means.
>>>>>>
>>>>>> I think it also has wider applicability as it triggers bugs in btrfs.
>>>>>> f2fs passes without issues, but probably benefits from a quick smoke gc
>>>>>> test as well. Discussed this with Bart and Daeho (now in cc) before
>>>>>> submitting.
>>>>>>
>>>>>> Using fsstress would be cool, but as far as I can tell it cannot
>>>>>> be told to operate at a specific file system usage point, which
>>>>>> is a key thing for this test.
>>>>>
>>>>> As a random test case, if this case can be transformed to use fsstress to cover
>>>>> same issues, that would be nice.
>>>>>
>>>>> But if as a regression test case, it has its particular test coverage, and the
>>>>> issue it covered can't be reproduced by fsstress way, then let's work on this
>>>>> bash script one.
>>>>>
>>>>> Any thoughts?
>>>>
>>>> Yeah, I think bash is preferable for this particular test case.
>>>> Bash also makes it easy to hack for people's private uses.
>>>>
>>>> I use longer versions of this test (increasing overwrite_percentage)
>>>> for weekly testing.
>>>>
>>>> If we need fsstress for reproducing any future gc bug we can add
>>>> whats missing to it then.
>>>>
>>>> Does that make sense?
>>>>
>>>
>>> Hey Zorro,
>>>
>>> Any remaining concerns for adding this test? I could run it across
>>> more file systems(bcachefs could be interesting) and share the results
>>> if needed be.
>>
>> Hi,
>>
>> I remembered you metioned btrfs fails on this test, and I can reproduce it
>> on btrfs [1] with general disk. Have you figured out the reason? I don't
>> want to give btrfs a test failure suddently without a proper explanation :)
>> If it's a case issue, better to fix it for btrfs.
> 
> 
> I was surprised to see the failure for brtrfs on a conventional block
> device, but have not dug into it. I suspect/assume it's the same root
> cause as the issue Johannes is looking into when using a zoned block
> device as backing storage.
> 
> I debugged that a bit with Johannes, and noticed that if I manually
> kick btrfs rebalancing after each write via sysfs, the test progresses
> further (but super slow).
> 
> So *I think* that btrfs needs to:
> 
> * tune the triggering of gc to kick in way before available free space
>     runs out
> * start slowing down / blocking writes when reclaim pressure is high to
>     avoid premature -ENOSPC:es.

Yes both Boris and I are working on different solutions to the GC 
problem. But apart from that, I have the feeling that using stat to 
check on the available space is not the best idea, at least for btrfs.

> It's a pretty nasty problem, as potentially any write could -ENOSPC
> long before the reported available space runs out when a workload
> ends up fragmenting the disk and write pressure is high..
Johannes Thumshirn May 12, 2024, 4:56 p.m. UTC | #17
[ +CC Boris ]
On 11.05.24 07:08, Hans Holmberg wrote:
> On 2024-05-08 10:51, Zorro Lang wrote:
>> On Wed, May 08, 2024 at 07:08:01AM +0000, Hans Holmberg wrote:
>>> On 2024-04-17 16:50, Hans Holmberg wrote:
>>>> On 2024-04-17 16:07, Zorro Lang wrote:
>>>>> On Wed, Apr 17, 2024 at 01:21:39PM +0000, Hans Holmberg wrote:
>>>>>> On 2024-04-17 14:43, Zorro Lang wrote:
>>>>>>> On Tue, Apr 16, 2024 at 11:54:37AM -0700, Darrick J. Wong wrote:
>>>>>>>> On Tue, Apr 16, 2024 at 09:07:43AM +0000, Hans Holmberg wrote:
>>>>>>>>> +Zorro (doh!)
>>>>>>>>>
>>>>>>>>> On 2024-04-15 13:23, Hans Holmberg wrote:
>>>>>>>>>> This test stresses garbage collection for file systems by first filling
>>>>>>>>>> up a scratch mount to a specific usage point with files of random size,
>>>>>>>>>> then doing overwrites in parallel with deletes to fragment the backing
>>>>>>>>>> storage, forcing reclaim.
>>>>>>>>>>
>>>>>>>>>> Signed-off-by: Hans Holmberg <hans.holmberg@wdc.com>
>>>>>>>>>> ---
>>>>>>>>>>
>>>>>>>>>> Test results in my setup (kernel 6.8.0-rc4+)
>>>>>>>>>> 	f2fs on zoned nullblk: pass (77s)
>>>>>>>>>> 	f2fs on conventional nvme ssd: pass (13s)
>>>>>>>>>> 	btrfs on zoned nublk: fails (-ENOSPC)
>>>>>>>>>> 	btrfs on conventional nvme ssd: fails (-ENOSPC)
>>>>>>>>>> 	xfs on conventional nvme ssd: pass (8s)
>>>>>>>>>>
>>>>>>>>>> Johannes(cc) is working on the btrfs ENOSPC issue.
>>>>>>>>>> 	
>>>>>>>>>>        tests/generic/744     | 124 ++++++++++++++++++++++++++++++++++++++++++
>>>>>>>>>>        tests/generic/744.out |   6 ++
>>>>>>>>>>        2 files changed, 130 insertions(+)
>>>>>>>>>>        create mode 100755 tests/generic/744
>>>>>>>>>>        create mode 100644 tests/generic/744.out
>>>>>>>>>>
>>>>>>>>>> diff --git a/tests/generic/744 b/tests/generic/744
>>>>>>>>>> new file mode 100755
>>>>>>>>>> index 000000000000..2c7ab76bf8b1
>>>>>>>>>> --- /dev/null
>>>>>>>>>> +++ b/tests/generic/744
>>>>>>>>>> @@ -0,0 +1,124 @@
>>>>>>>>>> +#! /bin/bash
>>>>>>>>>> +# SPDX-License-Identifier: GPL-2.0
>>>>>>>>>> +# Copyright (c) 2024 Western Digital Corporation.  All Rights Reserved.
>>>>>>>>>> +#
>>>>>>>>>> +# FS QA Test No. 744
>>>>>>>>>> +#
>>>>>>>>>> +# Inspired by btrfs/273 and generic/015
>>>>>>>>>> +#
>>>>>>>>>> +# This test stresses garbage collection in file systems
>>>>>>>>>> +# by first filling up a scratch mount to a specific usage point with
>>>>>>>>>> +# files of random size, then doing overwrites in parallel with
>>>>>>>>>> +# deletes to fragment the backing zones, forcing reclaim.
>>>>>>>>>> +
>>>>>>>>>> +. ./common/preamble
>>>>>>>>>> +_begin_fstest auto
>>>>>>>>>> +
>>>>>>>>>> +# real QA test starts here
>>>>>>>>>> +
>>>>>>>>>> +_require_scratch
>>>>>>>>>> +
>>>>>>>>>> +# This test requires specific data space usage, skip if we have compression
>>>>>>>>>> +# enabled.
>>>>>>>>>> +_require_no_compress
>>>>>>>>>> +
>>>>>>>>>> +M=$((1024 * 1024))
>>>>>>>>>> +min_fsz=$((1 * ${M}))
>>>>>>>>>> +max_fsz=$((256 * ${M}))
>>>>>>>>>> +bs=${M}
>>>>>>>>>> +fill_percent=95
>>>>>>>>>> +overwrite_percentage=20
>>>>>>>>>> +seq=0
>>>>>>>>>> +
>>>>>>>>>> +_create_file() {
>>>>>>>>>> +	local file_name=${SCRATCH_MNT}/data_$1
>>>>>>>>>> +	local file_sz=$2
>>>>>>>>>> +	local dd_extra=$3
>>>>>>>>>> +
>>>>>>>>>> +	POSIXLY_CORRECT=yes dd if=/dev/zero of=${file_name} \
>>>>>>>>>> +		bs=${bs} count=$(( $file_sz / ${bs} )) \
>>>>>>>>>> +		status=none $dd_extra  2>&1
>>>>>>>>>> +
>>>>>>>>>> +	status=$?
>>>>>>>>>> +	if [ $status -ne 0 ]; then
>>>>>>>>>> +		echo "Failed writing $file_name" >>$seqres.full
>>>>>>>>>> +		exit
>>>>>>>>>> +	fi
>>>>>>>>>> +}
>>>>>>>>
>>>>>>>> I wonder, is there a particular reason for doing all these file
>>>>>>>> operations with shell code instead of using fsstress to create and
>>>>>>>> delete files to fill the fs and stress all the zone-gc code?  This test
>>>>>>>> reminds me a lot of generic/476 but with more fork()ing.
>>>>>>>
>>>>>>> /me has the same confusion. Can this test cover more things than using
>>>>>>> fsstress (to do reclaim test) ? Or does it uncover some known bugs which
>>>>>>> other cases can't?
>>>>>>
>>>>>> ah, adding some more background is probably useful:
>>>>>>
>>>>>> I've been using this test to stress the crap out the zoned xfs garbage
>>>>>> collection / write throttling implementation for zoned rt subvolumes
>>>>>> support in xfs and it has found a number of issues during implementation
>>>>>> that i did not reproduce by other means.
>>>>>>
>>>>>> I think it also has wider applicability as it triggers bugs in btrfs.
>>>>>> f2fs passes without issues, but probably benefits from a quick smoke gc
>>>>>> test as well. Discussed this with Bart and Daeho (now in cc) before
>>>>>> submitting.
>>>>>>
>>>>>> Using fsstress would be cool, but as far as I can tell it cannot
>>>>>> be told to operate at a specific file system usage point, which
>>>>>> is a key thing for this test.
>>>>>
>>>>> As a random test case, if this case can be transformed to use fsstress to cover
>>>>> same issues, that would be nice.
>>>>>
>>>>> But if as a regression test case, it has its particular test coverage, and the
>>>>> issue it covered can't be reproduced by fsstress way, then let's work on this
>>>>> bash script one.
>>>>>
>>>>> Any thoughts?
>>>>
>>>> Yeah, I think bash is preferable for this particular test case.
>>>> Bash also makes it easy to hack for people's private uses.
>>>>
>>>> I use longer versions of this test (increasing overwrite_percentage)
>>>> for weekly testing.
>>>>
>>>> If we need fsstress for reproducing any future gc bug we can add
>>>> whats missing to it then.
>>>>
>>>> Does that make sense?
>>>>
>>>
>>> Hey Zorro,
>>>
>>> Any remaining concerns for adding this test? I could run it across
>>> more file systems(bcachefs could be interesting) and share the results
>>> if needed be.
>>
>> Hi,
>>
>> I remembered you metioned btrfs fails on this test, and I can reproduce it
>> on btrfs [1] with general disk. Have you figured out the reason? I don't
>> want to give btrfs a test failure suddently without a proper explanation :)
>> If it's a case issue, better to fix it for btrfs.
> 
> 
> I was surprised to see the failure for brtrfs on a conventional block
> device, but have not dug into it. I suspect/assume it's the same root
> cause as the issue Johannes is looking into when using a zoned block
> device as backing storage.
> 
> I debugged that a bit with Johannes, and noticed that if I manually
> kick btrfs rebalancing after each write via sysfs, the test progresses
> further (but super slow).
> 
> So *I think* that btrfs needs to:
> 
> * tune the triggering of gc to kick in way before available free space
>     runs out
> * start slowing down / blocking writes when reclaim pressure is high to
>     avoid premature -ENOSPC:es.

Yes both Boris and I are working on different solutions to the GC 
problem. But apart from that, I have the feeling that using stat to 
check on the available space is not the best idea.

> It's a pretty nasty problem, as potentially any write could -ENOSPC
> long before the reported available space runs out when a workload
> ends up fragmenting the disk and write pressure is high..
Qu Wenruo May 13, 2024, 7:33 a.m. UTC | #18
在 2024/5/13 02:26, Johannes Thumshirn 写道:
> [ +CC Boris ]
[...]
>> I was surprised to see the failure for brtrfs on a conventional block
>> device, but have not dug into it. I suspect/assume it's the same root
>> cause as the issue Johannes is looking into when using a zoned block
>> device as backing storage.
>>
>> I debugged that a bit with Johannes, and noticed that if I manually
>> kick btrfs rebalancing after each write via sysfs, the test progresses
>> further (but super slow).
>>
>> So *I think* that btrfs needs to:
>>
>> * tune the triggering of gc to kick in way before available free space
>>      runs out
>> * start slowing down / blocking writes when reclaim pressure is high to
>>      avoid premature -ENOSPC:es.
>
> Yes both Boris and I are working on different solutions to the GC
> problem. But apart from that, I have the feeling that using stat to
> check on the available space is not the best idea.

Although my previous workaround (fill to 100% then deleting 5%) is not
going to be feasible for zoned devices, what about two-run solution below?

- The first run to fill the whole fs until ENOSPC
   Then calculate how many bytes we have really written. (du?)

- Recreate the fs and fill to 95% of above number and start the test

But with this workaround, I'm not 100% if this is a good idea for all
filesystems.

AFAIK ext4/xfs sometimes can under-report the available space (aka,
reporting no available bytes, but can still write new data).

If we always go ENOSPC to calculate the real available space, it may
cause too much pressure.

And it may be a good idea for us btrfs guys to implement a similar
under-reporting available space behavior?

Thanks,
Qu
>
>> It's a pretty nasty problem, as potentially any write could -ENOSPC
>> long before the reported available space runs out when a workload
>> ends up fragmenting the disk and write pressure is high..
>
>
Hans Holmberg May 14, 2024, 8:02 a.m. UTC | #19
On 2024-05-13 09:33, Qu Wenruo wrote:
> 
> 
> 在 2024/5/13 02:26, Johannes Thumshirn 写道:
>> [ +CC Boris ]
> [...]
>>> I was surprised to see the failure for brtrfs on a conventional block
>>> device, but have not dug into it. I suspect/assume it's the same root
>>> cause as the issue Johannes is looking into when using a zoned block
>>> device as backing storage.
>>>
>>> I debugged that a bit with Johannes, and noticed that if I manually
>>> kick btrfs rebalancing after each write via sysfs, the test progresses
>>> further (but super slow).
>>>
>>> So *I think* that btrfs needs to:
>>>
>>> * tune the triggering of gc to kick in way before available free space
>>>       runs out
>>> * start slowing down / blocking writes when reclaim pressure is high to
>>>       avoid premature -ENOSPC:es.
>>
>> Yes both Boris and I are working on different solutions to the GC
>> problem. But apart from that, I have the feeling that using stat to
>> check on the available space is not the best idea.
> 
> Although my previous workaround (fill to 100% then deleting 5%) is not
> going to be feasible for zoned devices, what about two-run solution below?
> 
> - The first run to fill the whole fs until ENOSPC
>     Then calculate how many bytes we have really written. (du?)
> 
> - Recreate the fs and fill to 95% of above number and start the test
> 
> But with this workaround, I'm not 100% if this is a good idea for all
> filesystems.
> 
> AFAIK ext4/xfs sometimes can under-report the available space (aka,
> reporting no available bytes, but can still write new data).
> 
> If we always go ENOSPC to calculate the real available space, it may
> cause too much pressure.
> 
> And it may be a good idea for us btrfs guys to implement a similar
> under-reporting available space behavior?


My thoughts on this:

This test is not designed for testing how much data we can write to
a file system, so it would be fine to decrease fill_percent to allow
for a bit of fuzzyness. It would make the test longer to run though.

BUT that does not work around the btrfs issue(s). When testing around, I
tried decreasing fill_percent to something like 70 and btrfs still
-ENOSPC:ed. It's the fragmentation and the fact that reclaim does not
happen fast enough that causes writes to fail (I believe, johannes &
boris knows better).

Also, how are users supposed to know how much data they can store if 
stat does not tell them that with some degree of certainty?

Space accounting for full copy-on-write file systems is a Hard
Problem (tm), especially if metadata is also fully copy on write, but
that should not stop us from trying to do it right :)


Thanks,
Hans


> 
> Thanks,
> Qu
>>
>>> It's a pretty nasty problem, as potentially any write could -ENOSPC
>>> long before the reported available space runs out when a workload
>>> ends up fragmenting the disk and write pressure is high..
>>
>>
>
diff mbox series

Patch

diff --git a/tests/generic/744 b/tests/generic/744
new file mode 100755
index 000000000000..2c7ab76bf8b1
--- /dev/null
+++ b/tests/generic/744
@@ -0,0 +1,124 @@ 
+#! /bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (c) 2024 Western Digital Corporation.  All Rights Reserved.
+#
+# FS QA Test No. 744
+#
+# Inspired by btrfs/273 and generic/015
+#
+# This test stresses garbage collection in file systems
+# by first filling up a scratch mount to a specific usage point with
+# files of random size, then doing overwrites in parallel with
+# deletes to fragment the backing zones, forcing reclaim.
+
+. ./common/preamble
+_begin_fstest auto
+
+# real QA test starts here
+
+_require_scratch
+
+# This test requires specific data space usage, skip if we have compression
+# enabled.
+_require_no_compress
+
+M=$((1024 * 1024))
+min_fsz=$((1 * ${M}))
+max_fsz=$((256 * ${M}))
+bs=${M}
+fill_percent=95
+overwrite_percentage=20
+seq=0
+
+_create_file() {
+	local file_name=${SCRATCH_MNT}/data_$1
+	local file_sz=$2
+	local dd_extra=$3
+
+	POSIXLY_CORRECT=yes dd if=/dev/zero of=${file_name} \
+		bs=${bs} count=$(( $file_sz / ${bs} )) \
+		status=none $dd_extra  2>&1
+
+	status=$?
+	if [ $status -ne 0 ]; then
+		echo "Failed writing $file_name" >>$seqres.full
+		exit
+	fi
+}
+
+_total_M() {
+	local total=$(stat -f -c '%b' ${SCRATCH_MNT})
+	local bs=$(stat -f -c '%S' ${SCRATCH_MNT})
+	echo $(( ${total} * ${bs} / ${M}))
+}
+
+_used_percent() {
+	local available=$(stat -f -c '%a' ${SCRATCH_MNT})
+	local total=$(stat -f -c '%b' ${SCRATCH_MNT})
+	echo $((100 - (100 * ${available}) / ${total} ))
+}
+
+
+_delete_random_file() {
+	local to_delete=$(find ${SCRATCH_MNT} -type f | shuf | head -1)
+	rm $to_delete
+	sync ${SCRATCH_MNT}
+}
+
+_get_random_fsz() {
+	local r=$RANDOM
+	echo $(( ${min_fsz} + (${max_fsz} - ${min_fsz}) * (${r} % 100) / 100 ))
+}
+
+_direct_fillup () {
+	while [ $(_used_percent) -lt $fill_percent ]; do
+		local fsz=$(_get_random_fsz)
+
+		_create_file $seq $fsz "oflag=direct conv=fsync"
+		seq=$((${seq} + 1))
+	done
+}
+
+_mixed_write_delete() {
+	local dd_extra=$1
+	local total_M=$(_total_M)
+	local to_write_M=$(( ${overwrite_percentage} * ${total_M} / 100 ))
+	local written_M=0
+
+	while [ $written_M -lt $to_write_M ]; do
+		if [ $(_used_percent) -lt $fill_percent ]; then
+			local fsz=$(_get_random_fsz)
+
+			_create_file $seq $fsz "$dd_extra"
+			written_M=$((${written_M} + ${fsz}/${M}))
+			seq=$((${seq} + 1))
+		else
+			_delete_random_file
+		fi
+	done
+}
+
+seed=$RANDOM
+RANDOM=$seed
+echo "Running test with seed=$seed" >>$seqres.full
+
+_scratch_mkfs_sized $((8 * 1024 * 1024 * 1024)) >>$seqres.full
+_scratch_mount
+
+echo "Starting fillup using direct IO"
+_direct_fillup
+
+echo "Starting mixed write/delete test using direct IO"
+_mixed_write_delete "oflag=direct"
+
+echo "Starting mixed write/delete test using buffered IO"
+_mixed_write_delete ""
+
+echo "Syncing"
+sync ${SCRATCH_MNT}/*
+
+echo "Done, all good"
+
+# success, all done
+status=0
+exit
diff --git a/tests/generic/744.out b/tests/generic/744.out
new file mode 100644
index 000000000000..b40c2f43108e
--- /dev/null
+++ b/tests/generic/744.out
@@ -0,0 +1,6 @@ 
+QA output created by 744
+Starting fillup using direct IO
+Starting mixed write/delete test using direct IO
+Starting mixed write/delete test using buffered IO
+Syncing
+Done, all good