diff mbox series

btrfs/14{2,3}: use dm-dust instead of fail_make_request

Message ID d992390752c612acd0893ee3db929e77affded2b.1586983958.git.osandov@fb.com (mailing list archive)
State New, archived
Headers show
Series btrfs/14{2,3}: use dm-dust instead of fail_make_request | expand

Commit Message

Omar Sandoval April 15, 2020, 8:54 p.m. UTC
From: Omar Sandoval <osandov@fb.com>

These two tests test direct I/O and buffered read repair, respectively,
with fail_make_request. However, by using "fail_make_request/times",
they rely on repair having a specific I/O pattern. My pending Btrfs
direct I/O refactoring patch series changes this I/O pattern and thus
breaks this test.

The dm-dust target (added in v5.2) emulates a device with bad blocks
that are fixed when written to (like a device that remaps bad blocks).
This is exactly what we want for testing repair. Add some common dm-dust
helpers and update the tests to use dm-dust.

Signed-off-by: Omar Sandoval <osandov@fb.com>
---
 common/dmdust   | 35 +++++++++++++++++++
 tests/btrfs/142 | 88 +++++++++++------------------------------------
 tests/btrfs/143 | 90 +++++++++++--------------------------------------
 3 files changed, 73 insertions(+), 140 deletions(-)
 create mode 100644 common/dmdust

Comments

Nikolay Borisov May 12, 2020, 6:15 a.m. UTC | #1
On 15.04.20 г. 23:54 ч., Omar Sandoval wrote:
> From: Omar Sandoval <osandov@fb.com>
> 
> These two tests test direct I/O and buffered read repair, respectively,
> with fail_make_request. However, by using "fail_make_request/times",
> they rely on repair having a specific I/O pattern. My pending Btrfs
> direct I/O refactoring patch series changes this I/O pattern and thus
> breaks this test.
> 
> The dm-dust target (added in v5.2) emulates a device with bad blocks
> that are fixed when written to (like a device that remaps bad blocks).
> This is exactly what we want for testing repair. Add some common dm-dust
> helpers and update the tests to use dm-dust.
> 
> Signed-off-by: Omar Sandoval <osandov@fb.com>

Eryu, are you going to merge this patch ?
Eryu Guan May 12, 2020, 4:22 p.m. UTC | #2
On Tue, May 12, 2020 at 09:15:50AM +0300, Nikolay Borisov wrote:
> 
> 
> On 15.04.20 г. 23:54 ч., Omar Sandoval wrote:
> > From: Omar Sandoval <osandov@fb.com>
> > 
> > These two tests test direct I/O and buffered read repair, respectively,
> > with fail_make_request. However, by using "fail_make_request/times",
> > they rely on repair having a specific I/O pattern. My pending Btrfs
> > direct I/O refactoring patch series changes this I/O pattern and thus
> > breaks this test.
> > 
> > The dm-dust target (added in v5.2) emulates a device with bad blocks
> > that are fixed when written to (like a device that remaps bad blocks).
> > This is exactly what we want for testing repair. Add some common dm-dust
> > helpers and update the tests to use dm-dust.
> > 
> > Signed-off-by: Omar Sandoval <osandov@fb.com>
> 
> Eryu, are you going to merge this patch ?

The dmdust part seems fine to me, could you or other btrfs folks help
review the btrfs change? That'd be appreciated!

Thanks,
Eryu
Nikolay Borisov May 15, 2020, 6:41 a.m. UTC | #3
On 15.04.20 г. 23:54 ч., Omar Sandoval wrote:
> From: Omar Sandoval <osandov@fb.com>
> 
> These two tests test direct I/O and buffered read repair, respectively,
> with fail_make_request. However, by using "fail_make_request/times",
> they rely on repair having a specific I/O pattern. My pending Btrfs
> direct I/O refactoring patch series changes this I/O pattern and thus
> breaks this test.
> 
> The dm-dust target (added in v5.2) emulates a device with bad blocks
> that are fixed when written to (like a device that remaps bad blocks).
> This is exactly what we want for testing repair. Add some common dm-dust
> helpers and update the tests to use dm-dust.
> 
> Signed-off-by: Omar Sandoval <osandov@fb.com>

Reviewed-by: Nikolay Borisov <nborisov@suse.com>
diff mbox series

Patch

diff --git a/common/dmdust b/common/dmdust
new file mode 100644
index 00000000..56fcc0e0
--- /dev/null
+++ b/common/dmdust
@@ -0,0 +1,35 @@ 
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (c) 2020 Facebook.  All Rights Reserved.
+#
+# common functions for setting up and tearing down a dmdust device
+
+_init_dust()
+{
+	local DEV_SIZE=`blockdev --getsz $SCRATCH_DEV`
+	DUST_DEV=/dev/mapper/dust-test
+	DUST_TABLE="0 $DEV_SIZE dust $SCRATCH_DEV 0 512"
+	_dmsetup_create dust-test --table "$DUST_TABLE" || \
+		_fatal "failed to create dust device"
+}
+
+_mount_dust()
+{
+	_scratch_options mount
+	_mount -t $FSTYP `_common_dev_mount_options $*` $SCRATCH_OPTIONS \
+		$DUST_DEV $SCRATCH_MNT
+}
+
+_unmount_dust()
+{
+	$UMOUNT_PROG $SCRATCH_MNT
+}
+
+_cleanup_dust()
+{
+	# If dmsetup load fails then we need to make sure to do resume here
+	# otherwise the umount will hang
+	$DMSETUP_PROG resume dust-test > /dev/null 2>&1
+	$UMOUNT_PROG $SCRATCH_MNT > /dev/null 2>&1
+	_dmsetup_remove dust-test
+}
diff --git a/tests/btrfs/142 b/tests/btrfs/142
index db0a3377..e495c2c6 100755
--- a/tests/btrfs/142
+++ b/tests/btrfs/142
@@ -30,6 +30,7 @@  _cleanup()
 # get standard environment, filters and checks
 . ./common/rc
 . ./common/filter
+. ./common/dmdust
 
 # remove previous $seqres.full before test
 rm -f $seqres.full
@@ -39,55 +40,12 @@  rm -f $seqres.full
 # Modify as appropriate.
 _supported_fs btrfs
 _supported_os Linux
-_require_fail_make_request
 _require_scratch_dev_pool 2
+_require_dm_target dust
 
 _require_btrfs_command inspect-internal dump-tree
 _require_command "$FILEFRAG_PROG" filefrag
 
-get_physical()
-{
-	local logical=$1
-	local stripe=$2
-	$BTRFS_UTIL_PROG inspect-internal dump-tree -t 3 $SCRATCH_DEV | \
-		grep $logical -A 6 | \
-		$AWK_PROG "(\$1 ~ /stripe/ && \$3 ~ /devid/ && \$2 ~ /$stripe/) { print \$6 }"
-}
-
-get_devid()
-{
-	local logical=$1
-	local stripe=$2
-	$BTRFS_UTIL_PROG inspect-internal dump-tree -t 3 $SCRATCH_DEV | \
-		grep $logical -A 6 | \
-		$AWK_PROG "(\$1 ~ /stripe/ && \$3 ~ /devid/ && \$2 ~ /$stripe/) { print \$4 }"
-}
-
-get_device_path()
-{
-	local devid=$1
-	echo "$SCRATCH_DEV_POOL" | $AWK_PROG "{print \$$devid}"
-}
-
-start_fail()
-{
-	local sysfs_bdev="$1"
-	echo 100 > $DEBUGFS_MNT/fail_make_request/probability
-	echo 2 > $DEBUGFS_MNT/fail_make_request/times
-	echo 1 > $DEBUGFS_MNT/fail_make_request/task-filter
-	echo 0 > $DEBUGFS_MNT/fail_make_request/verbose
-	echo 1 > $sysfs_bdev/make-it-fail
-}
-
-stop_fail()
-{
-	local sysfs_bdev="$1"
-	echo 0 > $DEBUGFS_MNT/fail_make_request/probability
-	echo 0 > $DEBUGFS_MNT/fail_make_request/times
-	echo 0 > $DEBUGFS_MNT/fail_make_request/task-filter
-	echo 0 > $sysfs_bdev/make-it-fail
-}
-
 _scratch_dev_pool_get 2
 # step 1, create a raid1 btrfs which contains one 128k file.
 echo "step 1......mkfs.btrfs" >>$seqres.full
@@ -107,42 +65,34 @@  echo "step 2......corrupt file extent" >>$seqres.full
 
 ${FILEFRAG_PROG} -v $SCRATCH_MNT/foobar >> $seqres.full
 logical_in_btrfs=`${FILEFRAG_PROG} -v $SCRATCH_MNT/foobar | _filter_filefrag | cut -d '#' -f 1`
-physical=`get_physical ${logical_in_btrfs} 1`
-devid=$(get_devid ${logical_in_btrfs} 1)
-target_dev=$(get_device_path $devid)
+echo "Logical offset is $logical_in_btrfs" >>$seqres.full
+_scratch_unmount
 
-SYSFS_BDEV=`_sysfs_dev $target_dev`
+read -r stripe physical < <(
+$BTRFS_UTIL_PROG inspect-internal dump-tree -t 3 "$SCRATCH_DEV" |
+	grep "$logical_in_btrfs" -A 6 |
+	$AWK_PROG '$1 == "stripe" && $3 == "devid" && $4 == 1 { print $2 " " $6; exit }')
+echo "Physical offset of stripe $stripe is $physical on devid 1" >> $seqres.full
 
-_scratch_unmount
-$BTRFS_UTIL_PROG ins dump-tree -t 3 $SCRATCH_DEV | \
-	grep $logical_in_btrfs -A 6 >> $seqres.full
-echo "Corrupt stripe 1 devid $devid devpath $target_dev physical $physical" \
-	>> $seqres.full
-$XFS_IO_PROG -d -c "pwrite -S 0xbb -b 64K $physical 64K" $target_dev > /dev/null
+$XFS_IO_PROG -d -c "pwrite -S 0xbb -b 64K $physical 64K" "$SCRATCH_DEV" > /dev/null
 
-_scratch_mount -o nospace_cache
+_init_dust
+_mount_dust
 
 # step 3, 128k dio read (this read can repair bad copy)
 echo "step 3......repair the bad copy" >>$seqres.full
 
-# since raid1 consists of two copies, and the bad copy was put on stripe #1
-# while the good copy lies on stripe #0, the bad copy only gets access when the
-start_fail $SYSFS_BDEV
-while [[ -z ${result} ]]; do
-	# enable task-filter only fails the following dio read so the repair is
-	# supposed to work.
-	result=$(bash -c "
-	if [[ \$((\$\$ % 2)) -eq 1 ]]; then
-		echo 1 > /proc/\$\$/make-it-fail
-		exec $XFS_IO_PROG -d -c \"pread -b 128K 0 128K\" \"$SCRATCH_MNT/foobar\"
-	fi");
+$DMSETUP_PROG message dust-test 0 addbadblock $((physical / 512))
+$DMSETUP_PROG message dust-test 0 enable
+while [[ -z $( (( BASHPID % 2 == stripe )) &&
+	       exec $XFS_IO_PROG -d -c "pread -b 128K 0 128K" "$SCRATCH_MNT/foobar") ]]; do
+	:
 done
-stop_fail $SYSFS_BDEV
 
-_scratch_unmount
+_cleanup_dust
 
 # check if the repair works
-$XFS_IO_PROG -c "pread -v -b 512 $physical 512" $target_dev | \
+$XFS_IO_PROG -c "pread -v -b 512 $physical 512" "$SCRATCH_DEV" |
 	_filter_xfs_io_offset
 
 _scratch_dev_pool_put
diff --git a/tests/btrfs/143 b/tests/btrfs/143
index 0388a528..07040eb3 100755
--- a/tests/btrfs/143
+++ b/tests/btrfs/143
@@ -37,6 +37,7 @@  _cleanup()
 # get standard environment, filters and checks
 . ./common/rc
 . ./common/filter
+. ./common/dmdust
 
 # remove previous $seqres.full before test
 rm -f $seqres.full
@@ -46,58 +47,12 @@  rm -f $seqres.full
 # Modify as appropriate.
 _supported_fs btrfs
 _supported_os Linux
-_require_fail_make_request
 _require_scratch_dev_pool 2
+_require_dm_target dust
 
 _require_btrfs_command inspect-internal dump-tree
 _require_command "$FILEFRAG_PROG" filefrag
 
-get_physical()
-{
-	local logical=$1
-	local stripe=$2
-	$BTRFS_UTIL_PROG inspect-internal dump-tree -t 3 $SCRATCH_DEV | \
-		grep $logical -A 6 | \
-		$AWK_PROG "(\$1 ~ /stripe/ && \$3 ~ /devid/ && \$2 ~ /$stripe/) { print \$6 }"
-}
-
-get_devid()
-{
-	local logical=$1
-	local stripe=$2
-	$BTRFS_UTIL_PROG inspect-internal dump-tree -t 3 $SCRATCH_DEV | \
-		grep $logical -A 6 | \
-		$AWK_PROG "(\$1 ~ /stripe/ && \$3 ~ /devid/ && \$2 ~ /$stripe/) { print \$4 }"
-}
-
-get_device_path()
-{
-	local devid=$1
-	echo "$SCRATCH_DEV_POOL" | $AWK_PROG "{print \$$devid}"
-}
-
-SYSFS_BDEV=`_sysfs_dev $SCRATCH_DEV`
-
-start_fail()
-{
-	local sysfs_bdev="$1"
-	echo 100 > $DEBUGFS_MNT/fail_make_request/probability
-	# the 1st one fails the first bio which is reading 4k (or more due to
-	# readahead), and the 2nd one fails the retry of validation so that it
-	# triggers read-repair
-	echo 2 > $DEBUGFS_MNT/fail_make_request/times
-	echo 0 > $DEBUGFS_MNT/fail_make_request/verbose
-	echo 1 > $sysfs_bdev/make-it-fail
-}
-
-stop_fail()
-{
-	local sysfs_bdev="$1"
-	echo 0 > $DEBUGFS_MNT/fail_make_request/probability
-	echo 0 > $DEBUGFS_MNT/fail_make_request/times
-	echo 0 > $sysfs_bdev/make-it-fail
-}
-
 _scratch_dev_pool_get 2
 # step 1, create a raid1 btrfs which contains one 128k file.
 echo "step 1......mkfs.btrfs" >>$seqres.full
@@ -117,41 +72,34 @@  echo "step 2......corrupt file extent" >>$seqres.full
 
 ${FILEFRAG_PROG} -v $SCRATCH_MNT/foobar >> $seqres.full
 logical_in_btrfs=`${FILEFRAG_PROG} -v $SCRATCH_MNT/foobar | _filter_filefrag | cut -d '#' -f 1`
-physical=`get_physical ${logical_in_btrfs} 1`
-devid=$(get_devid ${logical_in_btrfs} 1)
-target_dev=$(get_device_path $devid)
-
-SYSFS_BDEV=`_sysfs_dev $target_dev`
+echo "Logical offset is $logical_in_btrfs" >>$seqres.full
 _scratch_unmount
 
-echo "corrupt stripe 1 devid $devid devpath $target_dev physical $physical" \
-	>> $seqres.full
-$XFS_IO_PROG -d -c "pwrite -S 0xbb -b 64K $physical 64K" $target_dev > /dev/null
+read -r stripe physical < <(
+$BTRFS_UTIL_PROG inspect-internal dump-tree -t 3 "$SCRATCH_DEV" |
+	grep "$logical_in_btrfs" -A 6 |
+	$AWK_PROG '$1 == "stripe" && $3 == "devid" && $4 == 1 { print $2 " " $6; exit }')
+echo "Physical offset of stripe $stripe is $physical on devid 1" >> $seqres.full
 
-_scratch_mount -o nospace_cache
+$XFS_IO_PROG -d -c "pwrite -S 0xbb -b 64K $physical 64K" "$SCRATCH_DEV" > /dev/null
+
+_init_dust
+_mount_dust
 
 # step 3, 128k buffered read (this read can repair bad copy)
 echo "step 3......repair the bad copy" >>$seqres.full
 
-# since raid1 consists of two copies, and the bad copy was put on stripe #1
-# while the good copy lies on stripe #0, the bad copy only gets access when the
-# reader's pid % 2 == 1 is true
-while [[ -z ${result} ]]; do
-    # invalidate the page cache.
-    _scratch_cycle_mount
-
-    start_fail $SYSFS_BDEV
-    result=$(bash -c "
-        if [[ \$((\$\$ % 2)) -eq 1 ]]; then
-                exec $XFS_IO_PROG -c \"pread 0 4K\" \"$SCRATCH_MNT/foobar\"
-        fi");
-    stop_fail $SYSFS_BDEV
+$DMSETUP_PROG message dust-test 0 addbadblock $((physical / 512))
+$DMSETUP_PROG message dust-test 0 enable
+while [[ -z $( (( BASHPID % 2 == stripe )) &&
+	       exec $XFS_IO_PROG -c "pread -b 128K 0 128K" "$SCRATCH_MNT/foobar") ]]; do
+	:
 done
 
-_scratch_unmount
+_cleanup_dust
 
 # check if the repair works
-$XFS_IO_PROG -c "pread -v -b 512 $physical 512" $target_dev |\
+$XFS_IO_PROG -c "pread -v -b 512 $physical 512" "$SCRATCH_DEV" |
 	_filter_xfs_io_offset
 
 _scratch_dev_pool_put