diff mbox series

[RFC,v2] fstests: add stress truncation + writeback + compaction split test

Message ID 20240429050022.350818-1-mcgrof@kernel.org (mailing list archive)
State Accepted, archived
Headers show
Series [RFC,v2] fstests: add stress truncation + writeback + compaction split test | expand

Commit Message

Luis Chamberlain April 29, 2024, 5 a.m. UTC
Stress test folio splits by using the debugfs interface to a target
a new smaller folio order while running compaction at the same time.
This is dangerous at the moment as its using a debugfs API which
requires two out of tree fixes [0] [1] which have already been
posted but not yet merged.

With these debugfs patches applied this test can now be used to
reproduce an issue which was only possible to reproduce by running
generic/447 twice with min order:

https://gist.github.com/mcgrof/d12f586ec6ebe32b2472b5d634c397df

This is designed to try to exacerbate races with folio splits incurred
by truncation and race that with compaction and writeback. This only
creates a crash with min order enabled, so for example with a 16k block
sized XFS test profile.

This also begs the question if something like MADV_NOHUGEPAGE might be
desirable from userspace, so to enable userspace to request splits when
possible.

If inspecting more closely, you'll want to enable on your kernel boot:

	dyndbg='file mm/huge_memory.c +p'

Since we want to race large folio splits we also augment the full test
output log $seqres.full with the test specific number of successful
splits from vmstat thp_split_page and thp_split_page_failed.

[0] https://git.kernel.org/pub/scm/linux/kernel/git/mcgrof/linux.git/commit/?h=20240424-lbs&id=80f6df5037fd0ad560526af45bd7f4d779fe03f6
[1] https://git.kernel.org/pub/scm/linux/kernel/git/mcgrof/linux.git/commit/?h=20240424-lbs&id=38f6fac5b4283ea48b1876fc56728f062168f8c3
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
---

For now at laest to allow people to more easily reproduce the crash we're
discussing here:

https://lkml.kernel.org/r/Zi8aYA92pvjDY7d5@bombadil.infradead.org

I can clean this up based on Zorro's feedback after this. Posting this RFCv2
so to enable folks to more easily reproduce the issue and also the debugfs
issue that this uses.

 common/rc             |  20 ++++++++
 tests/generic/745     | 115 ++++++++++++++++++++++++++++++++++++++++++
 tests/generic/745.out |   2 +
 3 files changed, 137 insertions(+)
 create mode 100755 tests/generic/745
 create mode 100644 tests/generic/745.out
diff mbox series

Patch

diff --git a/common/rc b/common/rc
index d4432f5ce259..1eefb53aa84b 100644
--- a/common/rc
+++ b/common/rc
@@ -127,6 +127,26 @@  _require_compaction()
 	    _notrun "Need compaction enabled CONFIG_COMPACTION=y"
 	fi
 }
+
+# Requires CONFIG_DEBUGFS and truncation knobs
+SPLIT_DEBUGFS="/sys/kernel/debug/split_huge_pages"
+_require_split_debugfs()
+{
+       if [ ! -f $SPLIT_DEBUGFS ]; then
+           _notrun "Needs CONFIG_DEBUGFS and split_huge_pages"
+       fi
+}
+
+_split_huge_pages_file_full()
+{
+	local file=$1
+	local offset="0x0"
+	local len=$(printf "%x" $(stat --format='%s' $file))
+	local order="0"
+	local split_cmd="$file,$offset,0x${len},$order"
+	echo $split_cmd > $SPLIT_DEBUGFS
+}
+
 # Get hugepagesize in bytes
 _get_hugepagesize()
 {
diff --git a/tests/generic/745 b/tests/generic/745
new file mode 100755
index 000000000000..0c67bd990a2f
--- /dev/null
+++ b/tests/generic/745
@@ -0,0 +1,115 @@ 
+#! /bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2024 Luis Chamberlain. All Rights Reserved.
+#
+# FS QA Test No. 734
+#
+# stress truncation + writeback + compaction
+#
+# This aims at trying to reproduce a difficult to reproduce bug found with
+# min order. The root cause lies in compaction racing with truncation on
+# min order:
+#
+# https://gist.github.com/mcgrof/d12f586ec6ebe32b2472b5d634c397df
+#
+# If you're enabling this and want to check underneath the hood you may want to
+# enable:
+#
+# dyndbg='file mm/huge_memory.c +p'
+#
+# We want to increase the rate of successful truncations + compaction racing,
+# so we want to increase the value of thp_split_page in $seqres.full.
+#
+# Our general goal here is to race with folio truncation + writeback and
+# compaction.
+
+. ./common/preamble
+
+# This is dangerous_fuzzers fow now until we get the debugfs interface
+# this uses fixed. Patches for that have been posted but still under
+# review.
+_begin_fstest long_rw stress soak smoketest dangerous_fuzzers
+
+# Override the default cleanup function.
+_cleanup()
+{
+	cd /
+	rm -f $tmp.*
+	$KILLALL_PROG -9 fsstress > /dev/null 2>&1
+}
+
+# Import common functions.
+. ./common/filter
+
+# real QA test starts here
+_supported_fs generic
+_require_test
+_require_scratch
+_require_split_debugfs
+_require_compaction
+_require_command "$KILLALL_PROG" "killall"
+
+echo "Silence is golden"
+
+_scratch_mkfs >>$seqres.full 2>&1
+_scratch_mount >> $seqres.full 2>&1
+
+nr_cpus=$((LOAD_FACTOR * 4))
+nr_ops=$((25000 * nr_cpus * TIME_FACTOR))
+
+fsstress_args=(-w -d $SCRATCH_MNT/test -n $nr_ops -p $nr_cpus)
+
+# used to let our loops know when to stop
+runfile="$tmp.keep.running.loop"
+touch $runfile
+
+# The background ops are out of bounds, the goal is to race with fsstress.
+
+# Force folio split if possible, this seems to be screaming for MADV_NOHUGEPAGE
+# for large folios.
+while [ -e $runfile ]; do
+	for i in $(find $SCRATCH_MNT/test \( -type f \) 2>/dev/null); do
+		_split_huge_pages_file_full $i >/dev/null 2>&1
+	done
+	sleep 2
+done &
+split_huge_pages_files_pid=$!
+
+while [ -e $runfile ]; do
+	echo 1 > /proc/sys/vm/compact_memory
+	sleep 10
+done &
+compaction_pid=$!
+
+blocksize=$(_get_file_block_size $SCRATCH_MNT)
+export XFS_DIO_MIN=$((blocksize * 2))
+
+test -n "$SOAK_DURATION" && fsstress_args+=(--duration="$SOAK_DURATION")
+
+split_count_before=0
+split_count_failed_before=0
+
+if grep -q thp_split_page /proc/vmstat; then
+	split_count_before=$(grep ^thp_split_page /proc/vmstat | head -1 | awk '{print $2}')
+	split_count_failed_before=$(grep ^thp_split_page_failed /proc/vmstat | head -1 | awk '{print $2}')
+else
+	echo "no thp_split_page in /proc/vmstat" >> /proc/vmstat
+fi
+
+$FSSTRESS_PROG $FSSTRESS_AVOID "${fsstress_args[@]}" >> $seqres.full
+
+rm -f $runfile
+wait > /dev/null 2>&1
+
+if grep -q thp_split_page /proc/vmstat; then
+	split_count_after=$(grep ^thp_split_page /proc/vmstat | head -1 | awk '{print $2}')
+	split_count_failed_after=$(grep ^thp_split_page_failed /proc/vmstat | head -1 | awk '{print $2}')
+	thp_split_page=$((split_count_after - split_count_before))
+	thp_split_page_failed=$((split_count_failed_after - split_count_failed_before))
+
+	echo "vmstat thp_split_page: $thp_split_page" >> $seqres.full
+	echo "vmstat thp_split_page_failed: $thp_split_page_failed" >> $seqres.full
+fi
+
+status=0
+exit
diff --git a/tests/generic/745.out b/tests/generic/745.out
new file mode 100644
index 000000000000..fce6b7f5489d
--- /dev/null
+++ b/tests/generic/745.out
@@ -0,0 +1,2 @@ 
+QA output created by 745
+Silence is golden