diff mbox series

[blktests,v2,2/2] md: add regression test for "md/md-bitmap: fix writing non bitmap pages"

Message ID 20240624104620.2156041-3-ofir.gal@volumez.com (mailing list archive)
State New, archived
Headers show
Series md: add regression test for "md/md-bitmap: fix writing non bitmap pages" | expand

Commit Message

Ofir Gal June 24, 2024, 10:46 a.m. UTC
A bug in md-bitmap has been discovered by setting up a md raid on top of
nvme-tcp devices that has optimal io size larger than the allocated
bitmap.

The following test reproduce the bug by setting up a md-raid on top of
nvme-tcp device over ram device that sets the optimal io size by using
dm-stripe.

Signed-off-by: Ofir Gal <ofir.gal@volumez.com>
---
 common/brd       | 28 +++++++++++++++
 tests/md/001     | 88 ++++++++++++++++++++++++++++++++++++++++++++++++
 tests/md/001.out |  2 ++
 tests/md/rc      | 12 +++++++
 4 files changed, 130 insertions(+)
 create mode 100644 common/brd
 create mode 100755 tests/md/001
 create mode 100644 tests/md/001.out
 create mode 100644 tests/md/rc

Comments

Daniel Wagner June 25, 2024, 3:01 p.m. UTC | #1
On Mon, Jun 24, 2024 at 01:46:18PM GMT, Ofir Gal wrote:
> +#restrict test to nvme-tcp only
> +nvme_trtype=tcp
> +nvmet_blkdev_type="device"
> +
> +requires() {
> +	# Require dm-stripe
> +	_have_program dmsetup
> +	_have_driver dm-mod
> +
> +	_require_nvme_trtype tcp
> +	_have_brd
> +}
> +
> +# Sets up a brd device of 1G with optimal-io-size of 256K
> +setup_underlying_device() {
> +	if ! _init_brd rd_size=1048576 rd_nr=1; then
> +		return 1
> +	fi
> +
> +	dmsetup create ram0_big_optio --table \
> +		"0 $(blockdev --getsz /dev/ram0) striped 1 512 /dev/ram0 0"
> +}
> +
> +cleanup_underlying_device() {
> +	dmsetup remove ram0_big_optio
> +	_cleanup_brd
> +}
> +
> +# Sets up a local host nvme over tcp
> +setup_nvme_over_tcp() {
> +	_setup_nvmet
> +
> +	local port
> +	port="$(_create_nvmet_port "${nvme_trtype}")"
> +
> +	_create_nvmet_subsystem "blktests-subsystem-0" "/dev/mapper/ram0_big_optio"
> +	_add_nvmet_subsys_to_port "${port}" "blktests-subsystem-0"

Use the defaults from blktests, e.g. ${def_subsysnqn}"

> +
> +	_create_nvmet_host "blktests-subsystem-0" "${def_hostnqn}"
> +
> +	_nvme_connect_subsys --subsysnqn "blktests-subsystem-0"
> +
> +	local nvmedev
> +	nvmedev=$(_find_nvme_dev "blktests-subsystem-0")

here too

> +	echo "${nvmedev}"
> +}
> +
> +cleanup_nvme_over_tcp() {
> +	local nvmedev=$1
> +	_nvme_disconnect_ctrl "${nvmedev}"
> +	_nvmet_target_cleanup --subsysnqn "blktests-subsystem-0"

same here

> +}
> +
> +test() {
> +	echo "Running ${TEST_NAME}"
> +
> +	setup_underlying_device
> +	local nvmedev
> +	nvmedev=$(setup_nvme_over_tcp)
> +
> +	# Hangs here without the fix
> +	mdadm --quiet --create /dev/md/blktests_md --level=1 --bitmap=internal \
> +		--bitmap-chunk=1024K --assume-clean --run --raid-devices=2 \
> +		/dev/"${nvmedev}"n1 missing

Instead hard coding the namespace ID, this should be made a bit more
robust by looking it up with _find_nvme_ns.
Ofir Gal July 16, 2024, 11:49 a.m. UTC | #2
On 6/25/24 18:01, Daniel Wagner wrote:
> On Mon, Jun 24, 2024 at 01:46:18PM GMT, Ofir Gal wrote:
>> +#restrict test to nvme-tcp only
>> +nvme_trtype=tcp
>> +nvmet_blkdev_type="device"
>> +
>> +requires() {
>> +	# Require dm-stripe
>> +	_have_program dmsetup
>> +	_have_driver dm-mod
>> +
>> +	_require_nvme_trtype tcp
>> +	_have_brd
>> +}
>> +
>> +# Sets up a brd device of 1G with optimal-io-size of 256K
>> +setup_underlying_device() {
>> +	if ! _init_brd rd_size=1048576 rd_nr=1; then
>> +		return 1
>> +	fi
>> +
>> +	dmsetup create ram0_big_optio --table \
>> +		"0 $(blockdev --getsz /dev/ram0) striped 1 512 /dev/ram0 0"
>> +}
>> +
>> +cleanup_underlying_device() {
>> +	dmsetup remove ram0_big_optio
>> +	_cleanup_brd
>> +}
>> +
>> +# Sets up a local host nvme over tcp
>> +setup_nvme_over_tcp() {
>> +	_setup_nvmet
>> +
>> +	local port
>> +	port="$(_create_nvmet_port "${nvme_trtype}")"
>> +
>> +	_create_nvmet_subsystem "blktests-subsystem-0" "/dev/mapper/ram0_big_optio"
>> +	_add_nvmet_subsys_to_port "${port}" "blktests-subsystem-0"
> Use the defaults from blktests, e.g. ${def_subsysnqn}"
>
>> +
>> +	_create_nvmet_host "blktests-subsystem-0" "${def_hostnqn}"
>> +
>> +	_nvme_connect_subsys --subsysnqn "blktests-subsystem-0"
>> +
>> +	local nvmedev
>> +	nvmedev=$(_find_nvme_dev "blktests-subsystem-0")
> here too
>
>> +	echo "${nvmedev}"
>> +}
>> +
>> +cleanup_nvme_over_tcp() {
>> +	local nvmedev=$1
>> +	_nvme_disconnect_ctrl "${nvmedev}"
>> +	_nvmet_target_cleanup --subsysnqn "blktests-subsystem-0"
> same here
>
>> +}
>> +
>> +test() {
>> +	echo "Running ${TEST_NAME}"
>> +
>> +	setup_underlying_device
>> +	local nvmedev
>> +	nvmedev=$(setup_nvme_over_tcp)
>> +
>> +	# Hangs here without the fix
>> +	mdadm --quiet --create /dev/md/blktests_md --level=1 --bitmap=internal \
>> +		--bitmap-chunk=1024K --assume-clean --run --raid-devices=2 \
>> +		/dev/"${nvmedev}"n1 missing
> Instead hard coding the namespace ID, this should be made a bit more
> robust by looking it up with _find_nvme_ns.
Thanks, applied comments to v3.
diff mbox series

Patch

diff --git a/common/brd b/common/brd
new file mode 100644
index 0000000..31e964f
--- /dev/null
+++ b/common/brd
@@ -0,0 +1,28 @@ 
+#!/bin/bash
+# SPDX-License-Identifier: GPL-3.0+
+# Copyright (C) 2024 Ofir Gal
+#
+# brd helper functions
+
+. common/shellcheck
+
+_have_brd() {
+	_have_module brd
+}
+
+_init_brd() {
+	# _have_brd loads brd, we need to wait a bit for brd to be not in use in
+	# order to reload it
+	sleep 0.2
+
+	if ! modprobe -r brd || ! modprobe brd "$@" ; then
+		echo "failed to reload brd with args: $*"
+		return 1
+	fi
+
+	return 0
+}
+
+_cleanup_brd() {
+	modprobe -r brd
+}
diff --git a/tests/md/001 b/tests/md/001
new file mode 100755
index 0000000..fc23ff7
--- /dev/null
+++ b/tests/md/001
@@ -0,0 +1,88 @@ 
+#!/bin/bash
+# SPDX-License-Identifier: GPL-3.0+
+# Copyright (C) 2024 Ofir Gal
+#
+# The bug is "visible" only when the underlying device of the raid is a network
+# block device that utilize MSG_SPLICE_PAGES. nvme-tcp is used as the network device.
+#
+# Regression test for patch "md/md-bitmap: fix writing non bitmap pages" and
+# for patch "nvme-tcp: use sendpages_ok() instead of sendpage_ok()"
+
+. tests/md/rc
+. common/brd
+. common/nvme
+
+DESCRIPTION="Raid with bitmap on tcp nvmet with opt-io-size over bitmap size"
+QUICK=1
+
+#restrict test to nvme-tcp only
+nvme_trtype=tcp
+nvmet_blkdev_type="device"
+
+requires() {
+	# Require dm-stripe
+	_have_program dmsetup
+	_have_driver dm-mod
+
+	_require_nvme_trtype tcp
+	_have_brd
+}
+
+# Sets up a brd device of 1G with optimal-io-size of 256K
+setup_underlying_device() {
+	if ! _init_brd rd_size=1048576 rd_nr=1; then
+		return 1
+	fi
+
+	dmsetup create ram0_big_optio --table \
+		"0 $(blockdev --getsz /dev/ram0) striped 1 512 /dev/ram0 0"
+}
+
+cleanup_underlying_device() {
+	dmsetup remove ram0_big_optio
+	_cleanup_brd
+}
+
+# Sets up a local host nvme over tcp
+setup_nvme_over_tcp() {
+	_setup_nvmet
+
+	local port
+	port="$(_create_nvmet_port "${nvme_trtype}")"
+
+	_create_nvmet_subsystem "blktests-subsystem-0" "/dev/mapper/ram0_big_optio"
+	_add_nvmet_subsys_to_port "${port}" "blktests-subsystem-0"
+
+	_create_nvmet_host "blktests-subsystem-0" "${def_hostnqn}"
+
+	_nvme_connect_subsys --subsysnqn "blktests-subsystem-0"
+
+	local nvmedev
+	nvmedev=$(_find_nvme_dev "blktests-subsystem-0")
+	echo "${nvmedev}"
+}
+
+cleanup_nvme_over_tcp() {
+	local nvmedev=$1
+	_nvme_disconnect_ctrl "${nvmedev}"
+	_nvmet_target_cleanup --subsysnqn "blktests-subsystem-0"
+}
+
+test() {
+	echo "Running ${TEST_NAME}"
+
+	setup_underlying_device
+	local nvmedev
+	nvmedev=$(setup_nvme_over_tcp)
+
+	# Hangs here without the fix
+	mdadm --quiet --create /dev/md/blktests_md --level=1 --bitmap=internal \
+		--bitmap-chunk=1024K --assume-clean --run --raid-devices=2 \
+		/dev/"${nvmedev}"n1 missing
+
+	mdadm --quiet --stop /dev/md/blktests_md
+	cleanup_nvme_over_tcp "${nvmedev}"
+	cleanup_underlying_device
+
+	echo "Test complete"
+}
diff --git a/tests/md/001.out b/tests/md/001.out
new file mode 100644
index 0000000..edd2ced
--- /dev/null
+++ b/tests/md/001.out
@@ -0,0 +1,2 @@ 
+Running md/001
+Test complete
diff --git a/tests/md/rc b/tests/md/rc
new file mode 100644
index 0000000..d492579
--- /dev/null
+++ b/tests/md/rc
@@ -0,0 +1,12 @@ 
+#!/bin/bash
+# SPDX-License-Identifier: GPL-3.0+
+# Copyright (C) 2024 Ofir Gal
+#
+# Tests for md raid
+
+. common/rc
+
+group_requires() {
+	_have_root
+	_have_program mdadm
+}