[20/28,RFC] check-parallel: run tests directly without using check

Message ID	20250417031208.1852171-21-david@fromorbit.com (mailing list archive)
State	New
Headers	show Received: from mail-pf1-f177.google.com (mail-pf1-f177.google.com [209.85.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 61D3D1CAA4 for <fstests@vger.kernel.org>; Thu, 17 Apr 2025 03:29:37 +0000 (UTC) From: Dave Chinner <david@fromorbit.com> To: fstests@vger.kernel.org Cc: zlang@kernel.org Subject: [PATCH 20/28] [RFC] check-parallel: run tests directly without using check Date: Thu, 17 Apr 2025 13:01:01 +1000 Message-ID: <20250417031208.1852171-21-david@fromorbit.com> In-Reply-To: <20250417031208.1852171-1-david@fromorbit.com> References: <20250417031208.1852171-1-david@fromorbit.com> Precedence: bulk MIME-Version: 1.0 Content-Transfer-Encoding: 8bit
Series	check-parallel: Running tests without check \| expand [00/28] check-parallel: Running tests without check [01/28] fstests: remove support for non-numeric test names [02/28] _scratch_mkfs_sized: obey USE_EXTERNAL for XFS filesystems [03/28] fstests: move test exit functions to common/exit [04/28] check-parallel: report how many tests were _notrun [05/28] check: factor out test list building code [06/28] check-parallel: use common group list parsing code [07/28] check-parallel: adjust concurrency according to CPU count [08/28] check-parallel: add logwrite device support [09/28] check-parallel: allow FSTYP selection from the CLI [10/28] check-parallel: use PID namespaces for runner process isolation [11/28] check-parallel: initial support for specifying device sizes [12/28] config: move config section code to it's own file [13/28] check-parallel: introduce config file support [14/28] fstests: further separate sourcing common/rc and common/config from initialisation [15/28] check-parallel: de-batch test execution [16/28] check-parallel: run sections directly [17/28] check-parallel: rebuild test list when FSTYP changes [18/28] check-parallel: create a "results-latest" symlink [19/28] check: factor test running [20/28,RFC] check-parallel: run tests directly without using check [21/28] generic/531: limit max files per CPU [22/28] fsync-tester.c: use syncfs() rather than sync() [23/28] open-by-handle.c: use syncfs() rather than sync() [24/28] open-by-handle.c: use syncfs() rather than sync() [25/28] bulkstat_unlink_test_modified.c: remove unused test code [26/28] stale-handle.c: use syncfs() rather than sync() [27/28] scaleread: remove dead test code [28/28] xfs/259: no need to call sync

diff --git a/check b/check index 106de0ee6..f032f9c48 100755 --- a/check +++ b/check @@ -25,15 +25,6 @@ _err_msg="" # start the initialisation work now iam=check.$$ -# mkfs.xfs uses the presence of both of these variables to enable formerly -# supported tiny filesystem configurations that fstests use for fuzz testing -# in a controlled environment -export MSGVERB="text:action" -export QA_CHECK_FS=${QA_CHECK_FS:=true} - -# number of diff lines from a failed test, 0 for whole output -export DIFF_LENGTH=${DIFF_LENGTH:=10} - rm -f $tmp.list $tmp.tmp $tmp.grep $here/$iam.out $tmp.report.* $tmp.arglist . ./common/exit @@ -188,6 +179,10 @@ fi # by default don't output timestamps _te_emit_timestamps=${TIMESTAMP:=} +# number of diff lines from a failed test, 0 for whole output +_te_diff_length=${DIFF_LENGTH:=10} + + # If the test config specified a soak test duration, see if there are any # unit suffixes that need converting to an integer seconds count. if [ -n "$SOAK_DURATION" ]; then @@ -245,21 +240,7 @@ _wrapup() fi needwrap=false elif $needwrap; then - if [ -f $check.time -a -f $tmp.time ]; then - cat $check.time $tmp.time \ - | $AWK_PROG ' - { t[$1] = $2 } - END { - if (NR > 0) { - for (i in t) print i " " t[i] - } - }' \ - | sort -n >$tmp.out - mv $tmp.out $check.time - if $OPTIONS_HAVE_SECTIONS; then - cp $check.time ${REPORT_DIR}/check.time - fi - fi + _te_time_report set +x _global_log "" diff --git a/check-parallel b/check-parallel index 1b67709a2..be3dfd346 100755 --- a/check-parallel +++ b/check-parallel @@ -18,13 +18,13 @@ run_section="" exclude_section="" iam="check-parallel" -tmp=/tmp/check-parallel.$$ -test_list="$tmp.test_list" +export here=`pwd` . ./common/exit . ./common/test_names . ./common/test_list . ./common/config-sections +. ./common/config usage() { @@ -155,6 +155,18 @@ elif [ -d "$basedir/runner-0/" ]; then prev_results=`ls -tr $basedir/runner-0/ | grep results | tail -1` fi +# We keep the test list in the base directory because it needs to be common +# across all test runners and their private namespaces. Because runners execute +# in private /tmp/ instances, we can't keep it there, so use the shared +# $basedir that hosts all the runners as the location of the test list. +test_list="$basedir/test_list" + +# Similarly, set our tmp dir to be under the shared basedir so that all the +# temp files that stuff like test execution use for expunge lists work +# appropriately. +export tmp="$basedir/tmp/check-parallel.$$" +mkdir -p $basedir/tmp + # grab all previously run tests and order them from highest runtime to lowest # We are going to try to run the longer tests first, hopefully so we can avoid # massive thundering herds trying to run lots of really short tests in parallel @@ -219,22 +231,6 @@ setup_test_list() echo $_tl_tests |sed -e 's/ /\n/g' | tac > $test_list } -# Grab the next test to be run from the tail of the file. -# Returns an empty string if there is no tests remaining to run. -# File operations are run under flock so concurrent gets are serialised against -# each other. -get_next_test() -{ - local test_file="$test_list.$1" - local test= - - flock 99 - test=$(tail -1 $test_file) - sed -i "\,$test,d" $test_file - flock -u 99 - echo $test -} - _create_loop_device() { @@ -260,32 +256,6 @@ _destroy_loop_device() losetup -d $dev || _fail "Cannot destroy loop device $dev" } -run_tests() -{ - local section="$1" - local logfile="$2" - - exec 99<>$tmp.test_list_lock - - local test_to_run=$(get_next_test $section) - - # Run the tests in it's own mount namespace, as per the comment below - # that precedes making the basedir a private mount. - # - # Similarly, we need to run check in it's own PID namespace so that - # operations like pkill only affect the runner instance, not globally - # kill processes from other check instances. - while [ -n "$test_to_run" ]; do - echo -n " $test_to_run " - unset FSTESTS_ISOL - if ! _tl_expunge_test $test_to_run; then - tools/run_privatens ./check -s $section $test_to_run >> $logfile 2>&1 - fi - - test_to_run=$(get_next_test $section) - done -} - runner_go() { @@ -301,6 +271,8 @@ runner_go() local _results=$me/results-$2 local section=$3 + export tmp=/tmp/check-parallel-runner-$id + mkdir -p $me xfs_io -f -c "truncate $TEST_DEV_SIZE" $_test @@ -323,6 +295,7 @@ runner_go() export LOGWRITES_DEV=$(_create_loop_device $_logwrites) export RESULT_BASE=$_results + export REPORT_DIR="$RESULT_BASE/$section" mkdir -p $TEST_DIR mkdir -p $SCRATCH_MNT @@ -338,7 +311,7 @@ runner_go() # export DUMP_CORRUPT_FS=1 - run_tests $section $_results/log + tools/run_test.sh $basedir $test_list.$section >> $_results/log 2>&1 wait sleep 1 @@ -351,6 +324,7 @@ runner_go() _destroy_loop_device $SCRATCH_RTDEV _destroy_loop_device $SCRATCH_LOGDEV _destroy_loop_device $LOGWRITES_DEV + rm -f $tmp.* grep -q Failures: $_results/log if [ $? -eq 0 ]; then @@ -366,6 +340,7 @@ run_section() local now="$2" local results="$basedir/*/results-$now" local i + local threads=$runners echo $run_section |grep -qw $section || return echo $exclude_section |grep -qw $section && return @@ -391,7 +366,15 @@ run_section() fi cp $test_list $test_list.$section - for ((i = 0; i < $runners; i++)); do + # only run as many runners are there are tests to run + i=$(cat $test_list.$section | wc -l) + if [ "$i" -lt "$runners" ]; then + threads=$i + fi + + echo Test list $test_list.$section contains $i tests + echo Running $threads tests concurrently + for ((i = 0; i < $threads; i++)); do runner_go $i $now $section & done wait diff --git a/common/config b/common/config index b93a6c0d3..149ef99c7 100644 --- a/common/config +++ b/common/config @@ -70,6 +70,12 @@ export LOAD_FACTOR=${LOAD_FACTOR:=1} export SOAK_DURATION=${SOAK_DURATION:=} export DEBUGFS_MNT=${DEBUGFS_MNT:="/sys/kernel/debug"} +# mkfs.xfs uses the presence of both of these variables to enable formerly +# supported tiny filesystem configurations that fstests use for fuzz testing +# in a controlled environment +export MSGVERB="text:action" +export QA_CHECK_FS=${QA_CHECK_FS:=true} + # some constants for overlayfs setup export OVL_UPPER="ovl-upper" export OVL_LOWER="ovl-lower" diff --git a/common/rc b/common/rc index be6cd92c4..5b7d2faf8 100644 --- a/common/rc +++ b/common/rc @@ -2936,6 +2936,8 @@ _require_xfs_io_command() _notrun "O_TMPFILE is not supported" ;; "fsmap") + df -h >> $seqres.full 2>&1 + df -i >> $seqres.full 2>&1 testio=`$XFS_IO_PROG -f -c "fsmap" $testfile 2>&1` echo $testio | grep -q "Inappropriate ioctl" && \ _notrun "xfs_io $command support is missing" diff --git a/common/test_exec b/common/test_exec index 63efa3d19..e9c80bd59 100644 --- a/common/test_exec +++ b/common/test_exec @@ -21,6 +21,12 @@ _te_notrun=() _te_loop_status=() _te_emit_timestamps="" _te_dry_run="" +_te_diff_length=10 + +# This is needed because callers don't always source common/report and +# if $do_report is undefined or true we'll try to call _make_testcase_report() +# without it being defined. +do_report=false _te_wipe_counters() { @@ -43,6 +49,25 @@ _te_timestamp() fi } +_te_time_report() +{ + if [ -f $check.time -a -f $tmp.time ]; then + cat $check.time $tmp.time \ + | $AWK_PROG ' + { t[$1] = $2 } + END { + if (NR > 0) { + for (i in t) print i " " t[i] + } + }' \ + | sort -n >$tmp.out + mv $tmp.out $check.time + if $OPTIONS_HAVE_SECTIONS; then + cp $check.time ${REPORT_DIR}/check.time + fi + fi +} + _te_check_filesystems() { local ret=0 @@ -330,10 +355,10 @@ _te_run_test() _dump_err "- output mismatch (see $seqres.out.bad)" mv $tmp.out $seqres.out.bad $diff $seq.out $seqres.out.bad | { - if test "$DIFF_LENGTH" -le 0; then + if test "$_te_diff_length" -le 0; then cat else - head -n "$DIFF_LENGTH" + head -n "$_te_diff_length" echo "..." echo "(Run '$diff $here/$seq.out $seqres.out.bad'" \ " to see the entire diff)" diff --git a/common/test_list b/common/test_list index 092b3ed17..4beb08b22 100644 --- a/common/test_list +++ b/common/test_list @@ -160,6 +160,11 @@ _tl_prepare_test_list() trim_test_list $list done + # Remove expunged tests + for f in "${_tl_exclude_tests[@]}"; do + trim_test_list tests/$f + done + # sort the list of tests into numeric order unless we're running tests # in the exact order specified if ! $_tl_exact_order; then @@ -194,6 +199,7 @@ _tl_expunge_test() return 1 } + _tl_setup_exclude_tests() { local list="$1" diff --git a/tests/xfs/271 b/tests/xfs/271 index 8a71746d6..ae4599282 100755 --- a/tests/xfs/271 +++ b/tests/xfs/271 @@ -22,8 +22,6 @@ _cleanup() _require_xfs_scratch_rmapbt _require_xfs_io_command "fsmap" -rm -f "$seqres.full" - echo "Format and mount" _scratch_mkfs > "$seqres.full" 2>&1 _scratch_mount diff --git a/tools/run_test.sh b/tools/run_test.sh new file mode 100755 index 000000000..75f7a7dcd --- /dev/null +++ b/tools/run_test.sh @@ -0,0 +1,116 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Copyright (c) 2025 Red Hat, Inc. All Rights Reserved. +# +# Test run helper for check-parallel +# +# check-parallel sets up all the devices and environments needs to run tests, +# but we can't set up for a test until we are executing in a private pid/mount +# namespace. +# +# This means we cannot simply run the test itself in a private name space; they +# require things like the test device to already be mounted, and we require a +# private mount namespace before we start mounting devices. +# +# Hence we need a helper that first creates a private namespace, then +# does all the setup work for tests to run, then iterates tests until there +# are no more tests to run. +# +# The tests to run are held in a shared file in $basedir so that we set up +# a private /tmp for the namespace and no lose access to the test list. + +# re-execute in a private namespace as a first step +if [ -z "${FSTESTS_ISOL}" ]; then + if [ -z "$1" ] || [ "$1" = "--help" ]; then + echo "Usage: $0 basedir test_list " + exit 1 + fi + + if [ ! -d "$1" ]; then + echo "invalid basedir ($1) specified" + exit 1 + fi + + FSTESTS_ISOL=privatens exec "$(dirname "$0")/../src/nsexec" -z -m -p "$0" "$@" + exit $? +fi + +# Everything past this point runs in a private namespace. +# +# We set up private mounts for /proc and /tmp so they aren't visible outside +# this mount namespace and it's children. +for path in /proc /tmp; do + mountpoint "$path" >/dev/null && \ + mount --make-private "$path" +done +mount -t proc proc /proc +mount -t tmpfs tmpfs /tmp + +echo $PWD $FSTYP + +. ./common/exit +. ./common/test_names +. ./common/test_list +. ./common/test_exec +. ./common/rc + +basedir="$1" +test_file="$2" +_te_diff_length=${DIFF_LENGTH:=10} +export tmp=/tmp/run-helper.$$ + +# XXX: should be a _te_diff variable +diff='diff -u' + +# Grab the next test to be run from the tail of the file. +# Returns an empty string if there is no tests remaining to run. +# File operations are run under flock so concurrent gets are serialised against +# each other. +get_next_test() +{ + local test= + + flock 99 + test=$(tail -1 $test_file) + sed -i "\,$test,d" $test_file + flock -u 99 + echo $test +} + +_run_seq() +{ + ./$seq + return $? +} + +exec 99<>$test_file.lock + +# XXX - refactor this back to a test_exec variable +check="$RESULT_BASE/check" +touch $check.time + +init_rc + +test_to_run=$(get_next_test) +while [ -n "$test_to_run" ]; do + _te_run_test "tests/$test_to_run" + + test_to_run=$(get_next_test) +done + +_te_time_report + +if ((${#_te_try[*]} > 0)); then + echo "Ran: ${_te_try[*]}" +fi + +if ((${#_te_notrun[*]} > 0)); then + echo "Not run: ${_te_notrun[*]}" +fi + +if ((${#_te_bad[*]} > 0)); then + echo "Failures: ${_te_bad[*]}" + echo "Failed ${#_te_bad[*]} of ${#_te_try[*]} tests" +else + echo "Passed all ${#_te_try[*]} tests" +fi

[20/28,RFC] check-parallel: run tests directly without using check

Commit Message

Patch