Message ID | 20240624104620.2156041-3-ofir.gal@volumez.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | md: add regression test for "md/md-bitmap: fix writing non bitmap pages" | expand |
On Mon, Jun 24, 2024 at 01:46:18PM GMT, Ofir Gal wrote: > +#restrict test to nvme-tcp only > +nvme_trtype=tcp > +nvmet_blkdev_type="device" > + > +requires() { > + # Require dm-stripe > + _have_program dmsetup > + _have_driver dm-mod > + > + _require_nvme_trtype tcp > + _have_brd > +} > + > +# Sets up a brd device of 1G with optimal-io-size of 256K > +setup_underlying_device() { > + if ! _init_brd rd_size=1048576 rd_nr=1; then > + return 1 > + fi > + > + dmsetup create ram0_big_optio --table \ > + "0 $(blockdev --getsz /dev/ram0) striped 1 512 /dev/ram0 0" > +} > + > +cleanup_underlying_device() { > + dmsetup remove ram0_big_optio > + _cleanup_brd > +} > + > +# Sets up a local host nvme over tcp > +setup_nvme_over_tcp() { > + _setup_nvmet > + > + local port > + port="$(_create_nvmet_port "${nvme_trtype}")" > + > + _create_nvmet_subsystem "blktests-subsystem-0" "/dev/mapper/ram0_big_optio" > + _add_nvmet_subsys_to_port "${port}" "blktests-subsystem-0" Use the defaults from blktests, e.g. ${def_subsysnqn}" > + > + _create_nvmet_host "blktests-subsystem-0" "${def_hostnqn}" > + > + _nvme_connect_subsys --subsysnqn "blktests-subsystem-0" > + > + local nvmedev > + nvmedev=$(_find_nvme_dev "blktests-subsystem-0") here too > + echo "${nvmedev}" > +} > + > +cleanup_nvme_over_tcp() { > + local nvmedev=$1 > + _nvme_disconnect_ctrl "${nvmedev}" > + _nvmet_target_cleanup --subsysnqn "blktests-subsystem-0" same here > +} > + > +test() { > + echo "Running ${TEST_NAME}" > + > + setup_underlying_device > + local nvmedev > + nvmedev=$(setup_nvme_over_tcp) > + > + # Hangs here without the fix > + mdadm --quiet --create /dev/md/blktests_md --level=1 --bitmap=internal \ > + --bitmap-chunk=1024K --assume-clean --run --raid-devices=2 \ > + /dev/"${nvmedev}"n1 missing Instead hard coding the namespace ID, this should be made a bit more robust by looking it up with _find_nvme_ns.
On 6/25/24 18:01, Daniel Wagner wrote: > On Mon, Jun 24, 2024 at 01:46:18PM GMT, Ofir Gal wrote: >> +#restrict test to nvme-tcp only >> +nvme_trtype=tcp >> +nvmet_blkdev_type="device" >> + >> +requires() { >> + # Require dm-stripe >> + _have_program dmsetup >> + _have_driver dm-mod >> + >> + _require_nvme_trtype tcp >> + _have_brd >> +} >> + >> +# Sets up a brd device of 1G with optimal-io-size of 256K >> +setup_underlying_device() { >> + if ! _init_brd rd_size=1048576 rd_nr=1; then >> + return 1 >> + fi >> + >> + dmsetup create ram0_big_optio --table \ >> + "0 $(blockdev --getsz /dev/ram0) striped 1 512 /dev/ram0 0" >> +} >> + >> +cleanup_underlying_device() { >> + dmsetup remove ram0_big_optio >> + _cleanup_brd >> +} >> + >> +# Sets up a local host nvme over tcp >> +setup_nvme_over_tcp() { >> + _setup_nvmet >> + >> + local port >> + port="$(_create_nvmet_port "${nvme_trtype}")" >> + >> + _create_nvmet_subsystem "blktests-subsystem-0" "/dev/mapper/ram0_big_optio" >> + _add_nvmet_subsys_to_port "${port}" "blktests-subsystem-0" > Use the defaults from blktests, e.g. ${def_subsysnqn}" > >> + >> + _create_nvmet_host "blktests-subsystem-0" "${def_hostnqn}" >> + >> + _nvme_connect_subsys --subsysnqn "blktests-subsystem-0" >> + >> + local nvmedev >> + nvmedev=$(_find_nvme_dev "blktests-subsystem-0") > here too > >> + echo "${nvmedev}" >> +} >> + >> +cleanup_nvme_over_tcp() { >> + local nvmedev=$1 >> + _nvme_disconnect_ctrl "${nvmedev}" >> + _nvmet_target_cleanup --subsysnqn "blktests-subsystem-0" > same here > >> +} >> + >> +test() { >> + echo "Running ${TEST_NAME}" >> + >> + setup_underlying_device >> + local nvmedev >> + nvmedev=$(setup_nvme_over_tcp) >> + >> + # Hangs here without the fix >> + mdadm --quiet --create /dev/md/blktests_md --level=1 --bitmap=internal \ >> + --bitmap-chunk=1024K --assume-clean --run --raid-devices=2 \ >> + /dev/"${nvmedev}"n1 missing > Instead hard coding the namespace ID, this should be made a bit more > robust by looking it up with _find_nvme_ns. Thanks, applied comments to v3.
diff --git a/common/brd b/common/brd new file mode 100644 index 0000000..31e964f --- /dev/null +++ b/common/brd @@ -0,0 +1,28 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-3.0+ +# Copyright (C) 2024 Ofir Gal +# +# brd helper functions + +. common/shellcheck + +_have_brd() { + _have_module brd +} + +_init_brd() { + # _have_brd loads brd, we need to wait a bit for brd to be not in use in + # order to reload it + sleep 0.2 + + if ! modprobe -r brd || ! modprobe brd "$@" ; then + echo "failed to reload brd with args: $*" + return 1 + fi + + return 0 +} + +_cleanup_brd() { + modprobe -r brd +} diff --git a/tests/md/001 b/tests/md/001 new file mode 100755 index 0000000..fc23ff7 --- /dev/null +++ b/tests/md/001 @@ -0,0 +1,88 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-3.0+ +# Copyright (C) 2024 Ofir Gal +# +# The bug is "visible" only when the underlying device of the raid is a network +# block device that utilize MSG_SPLICE_PAGES. nvme-tcp is used as the network device. +# +# Regression test for patch "md/md-bitmap: fix writing non bitmap pages" and +# for patch "nvme-tcp: use sendpages_ok() instead of sendpage_ok()" + +. tests/md/rc +. common/brd +. common/nvme + +DESCRIPTION="Raid with bitmap on tcp nvmet with opt-io-size over bitmap size" +QUICK=1 + +#restrict test to nvme-tcp only +nvme_trtype=tcp +nvmet_blkdev_type="device" + +requires() { + # Require dm-stripe + _have_program dmsetup + _have_driver dm-mod + + _require_nvme_trtype tcp + _have_brd +} + +# Sets up a brd device of 1G with optimal-io-size of 256K +setup_underlying_device() { + if ! _init_brd rd_size=1048576 rd_nr=1; then + return 1 + fi + + dmsetup create ram0_big_optio --table \ + "0 $(blockdev --getsz /dev/ram0) striped 1 512 /dev/ram0 0" +} + +cleanup_underlying_device() { + dmsetup remove ram0_big_optio + _cleanup_brd +} + +# Sets up a local host nvme over tcp +setup_nvme_over_tcp() { + _setup_nvmet + + local port + port="$(_create_nvmet_port "${nvme_trtype}")" + + _create_nvmet_subsystem "blktests-subsystem-0" "/dev/mapper/ram0_big_optio" + _add_nvmet_subsys_to_port "${port}" "blktests-subsystem-0" + + _create_nvmet_host "blktests-subsystem-0" "${def_hostnqn}" + + _nvme_connect_subsys --subsysnqn "blktests-subsystem-0" + + local nvmedev + nvmedev=$(_find_nvme_dev "blktests-subsystem-0") + echo "${nvmedev}" +} + +cleanup_nvme_over_tcp() { + local nvmedev=$1 + _nvme_disconnect_ctrl "${nvmedev}" + _nvmet_target_cleanup --subsysnqn "blktests-subsystem-0" +} + +test() { + echo "Running ${TEST_NAME}" + + setup_underlying_device + local nvmedev + nvmedev=$(setup_nvme_over_tcp) + + # Hangs here without the fix + mdadm --quiet --create /dev/md/blktests_md --level=1 --bitmap=internal \ + --bitmap-chunk=1024K --assume-clean --run --raid-devices=2 \ + /dev/"${nvmedev}"n1 missing + + mdadm --quiet --stop /dev/md/blktests_md + cleanup_nvme_over_tcp "${nvmedev}" + cleanup_underlying_device + + echo "Test complete" +} diff --git a/tests/md/001.out b/tests/md/001.out new file mode 100644 index 0000000..edd2ced --- /dev/null +++ b/tests/md/001.out @@ -0,0 +1,2 @@ +Running md/001 +Test complete diff --git a/tests/md/rc b/tests/md/rc new file mode 100644 index 0000000..d492579 --- /dev/null +++ b/tests/md/rc @@ -0,0 +1,12 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-3.0+ +# Copyright (C) 2024 Ofir Gal +# +# Tests for md raid + +. common/rc + +group_requires() { + _have_root + _have_program mdadm +}
A bug in md-bitmap has been discovered by setting up a md raid on top of nvme-tcp devices that has optimal io size larger than the allocated bitmap. The following test reproduce the bug by setting up a md-raid on top of nvme-tcp device over ram device that sets the optimal io size by using dm-stripe. Signed-off-by: Ofir Gal <ofir.gal@volumez.com> --- common/brd | 28 +++++++++++++++ tests/md/001 | 88 ++++++++++++++++++++++++++++++++++++++++++++++++ tests/md/001.out | 2 ++ tests/md/rc | 12 +++++++ 4 files changed, 130 insertions(+) create mode 100644 common/brd create mode 100755 tests/md/001 create mode 100644 tests/md/001.out create mode 100644 tests/md/rc