@@ -18,6 +18,7 @@ run_section=""
iam="check-parallel"
tmp=/tmp/check-parallel.$$
+test_list="$tmp.test_list"
. ./common/exit
. ./common/test_names
@@ -150,9 +151,6 @@ if [ -d "$basedir/runner-0/" ]; then
prev_results=`ls -tr $basedir/runner-0/ | grep results | tail -1`
fi
-_tl_prepare_test_list
-_tl_strip_test_list
-
# grab all previously run tests and order them from highest runtime to lowest
# We are going to try to run the longer tests first, hopefully so we can avoid
# massive thundering herds trying to run lots of really short tests in parallel
@@ -198,22 +196,22 @@ if ! $_tl_randomise -a ! $_tl_exact_order; then
fi
fi
-# split the list amongst N runners
-split_runner_list()
+# Grab the next test to be run from the tail of the file.
+# Returns an empty string if there is no tests remaining to run.
+# File operations are run under flock so concurrent gets are serialised against
+# each other.
+get_next_test()
{
- local ix
- local rx
- local -a _list=( $_tl_tests )
- for ((ix = 0; ix < ${#_list[*]}; ix++)); do
- seq="${_list[$ix]}"
- rx=$((ix % $runners))
- if ! _tl_expunge_test $seq; then
- runner_list[$rx]+="${_list[$ix]} "
- fi
- #echo $seq
- done
+ local test=
+
+ flock 99
+ test=$(tail -1 $test_list)
+ sed -i "\,$test,d" $test_list
+ flock -u 99
+ echo $test
}
+
_create_loop_device()
{
local file=$1 dev
@@ -240,6 +238,8 @@ _destroy_loop_device()
runner_go()
{
+ exec 99<>$tmp.test_list_lock
+
local id=$1
local me=$basedir/runner-$id
local _test=$me/test.img
@@ -250,6 +250,7 @@ runner_go()
local _scratch_log=$me/scratch-log.img
local _logwrites=$me/logwrites.img
local _results=$me/results-$2
+ local test_to_run=$(get_next_test)
mkdir -p $me
@@ -291,7 +292,15 @@ runner_go()
# Similarly, we need to run check in it's own PID namespace so that
# operations like pkill only affect the runner instance, not globally
# kill processes from other check instances.
- tools/run_privatens ./check $run_section -x unreliable_in_parallel --exact-order ${runner_list[$id]} >> $me/log 2>&1
+ while [ -n "$test_to_run" ]; do
+ echo "Runner $id: running test $test_to_run"
+ unset FSTESTS_ISOL
+ if ! _tl_expunge_test $test_to_run; then
+ tools/run_privatens ./check $run_section $test_to_run >> $me/log 2>&1
+ fi
+
+ test_to_run=$(get_next_test)
+ done
wait
sleep 1
@@ -320,20 +329,32 @@ cleanup()
umount -R $basedir/*/test 2> /dev/null
umount -R $basedir/*/scratch 2> /dev/null
losetup --detach-all
+ rm -rf $tmp.*
}
trap "cleanup; exit" HUP INT QUIT TERM
_config_setup_parallel
-split_runner_list
+_tl_setup_exclude_group "unreliable_in_parallel"
+_tl_prepare_test_list
+_tl_strip_test_list
+
+if ! $_tl_randomise -a ! $_tl_exact_order; then
+ if [ -f $basedir/runner-0/$prev_results/check.time ]; then
+ time_order_test_list
+ fi
+fi
+
+# reverse the order of tests so that the get_next_test() can pull from the file
+# tail rather than the head.
+echo $_tl_tests |sed -e 's/ /\n/g' | tac > $test_list
if [ -n "$show_test_list" ]; then
echo Time ordered test list:
- echo $_tl_tests
- echo
+ cat $test_list
+ exit 0
fi
-
# Each parallel test runner needs to only see it's own mount points. If we
# leave the basedir as shared, then all tests see all mounts and then we get
# mount propagation issues cropping up. For example, cloning a new mount
@@ -349,20 +370,10 @@ mount --make-private $basedir
now=`date +%Y-%m-%d-%H:%M:%S`
for ((i = 0; i < $runners; i++)); do
-
- if [ -n "$show_test_list" ]; then
- echo "Runner $i: ${runner_list[$i]}"
- else
- runner_go $i $now &
- fi
-
+ runner_go $i $now &
done;
wait
-if [ -n "$show_test_list" ]; then
- exit 0
-fi
-
echo -n "Tests run: "
grep Ran $basedir/*/log | sed -e 's,^.*:,,' -e 's, ,\n,g' | sort | uniq | wc -l