Message ID | 51fdd212d139d203506cc2ee18abb362e5859e3e.1696196382.git.alison.schofield@intel.com (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | Vishal Verma |
Headers | show |
Series | Support poison list retrieval | expand |
On Sun, 2023-10-01 at 15:31 -0700, alison.schofield@intel.com wrote: > From: Alison Schofield <alison.schofield@intel.com> > > Exercise cxl list, libcxl, and driver pieces of the get poison list > pathway. Inject and clear poison using debugfs and use cxl-cli to > read the poison list by memdev and by region. > > Signed-off-by: Alison Schofield <alison.schofield@intel.com> > --- > test/cxl-poison.sh | 103 +++++++++++++++++++++++++++++++++++++++++++++ > test/meson.build | 2 + > 2 files changed, 105 insertions(+) > create mode 100644 test/cxl-poison.sh > > diff --git a/test/cxl-poison.sh b/test/cxl-poison.sh > new file mode 100644 > index 000000000000..3c424532da7b > --- /dev/null > +++ b/test/cxl-poison.sh > @@ -0,0 +1,103 @@ > +#!/bin/bash > +# SPDX-License-Identifier: GPL-2.0 > +# Copyright (C) 2022 Intel Corporation. All rights reserved. > + > +. $(dirname $0)/common > + > +rc=77 > + > +set -ex > + > +trap 'err $LINENO' ERR > + > +check_prereq "jq" > + > +modprobe -r cxl_test > +modprobe cxl_test > +cxl list "$CXL" list Also should reset rc from 77 so that it doesn't show as skipped on a real failure. > + > +# THEORY OF OPERATION: Exercise cxl-cli and cxl driver ability to > +# inject, clear, and get the poison list. Do it by memdev and by region. > +# Based on current cxl-test topology. > + > +create_region() > +{ > + region=$($CXL create-region -d $decoder -m $memdevs | jq -r ".region") > + > + if [[ ! $region ]]; then > + echo "create-region failed for $decoder" > + err "$LINENO" > + fi > +} > + > +setup_x2_region() > +{ > + # Find an x2 decoder > + decoder=$($CXL list -b cxl_test -D -d root | jq -r ".[] | I suspect this comes from another test, but test/common defines a $cxl_test_bus that can be used here. > + select(.pmem_capable == true) | > + select(.nr_targets == 2) | > + .decoder") > + > + # Find a memdev for each host-bridge interleave position > + port_dev0=$($CXL list -T -d $decoder | jq -r ".[] | > + .targets | .[] | select(.position == 0) | .target") > + port_dev1=$($CXL list -T -d $decoder | jq -r ".[] | > + .targets | .[] | select(.position == 1) | .target") > + mem0=$($CXL list -M -p $port_dev0 | jq -r ".[0].memdev") > + mem1=$($CXL list -M -p $port_dev1 | jq -r ".[0].memdev") > + memdevs="$mem0 $mem1" > +} > + > +find_media_errors() > +{ > + nr=$(echo $json | jq -r ".nr_poison_records") No need for echo and pipe - nr="$(jq -r ".nr_poison_records" <<< "$json")" Also, this currently assumes that a global '$json' will be available and up to date. In this test the way it is called, this will always be true, but it would be cleaner to actually pass $json to find_media_errors() each time, and in here, do something like local json="$1" > + if [[ $nr -ne $NR_ERRS ]]; then If using the bash variant, [[ ]], this should be if [[ $nr != $NR_ERRS ]]; then > + echo "$mem: $NR_ERRS poison records expected, $nr found" > + err "$LINENO" > + fi > +} > + > +# Turn Tracing ON > +# Note that 'cxl list --poison' does toggle the tracing, so > +# turning it on here is to enable the test user to view inject > +# and clear trace events, if they wish. > +echo 1 > /sys/kernel/tracing/events/cxl/cxl_poison/enable > + > +# Using DEBUGFS: > +# When cxl-cli support for inject and clear arrives, replace > +# the writes to /sys/kernel/debug with the new cxl commands > +# that wrap them. > + > +# Poison by memdev: inject, list, clear, list. > +# Inject 2 into pmem and 2 into ram partition. > +echo 0x40000000 > /sys/kernel/debug/cxl/mem1/inject_poison > +echo 0x40001000 > /sys/kernel/debug/cxl/mem1/inject_poison > +echo 0x0 > /sys/kernel/debug/cxl/mem1/inject_poison > +echo 0x600 > /sys/kernel/debug/cxl/mem1/inject_poison > +NR_ERRS=4 > +json=$("$CXL" list -m mem1 --poison | jq -r '.[].poison') > +find_media_errors > +echo 0x40000000 > /sys/kernel/debug/cxl/mem1/clear_poison > +echo 0x40001000 > /sys/kernel/debug/cxl/mem1/clear_poison > +echo 0x0 > /sys/kernel/debug/cxl/mem1/clear_poison > +echo 0x600 > /sys/kernel/debug/cxl/mem1/clear_poison > +NR_ERRS=0 > +json=$("$CXL" list -m mem1 --poison | jq -r '.[].poison') > +find_media_errors For all of the above debugfs writes - mem1 is hard-coded - is this supposed to be "$mem1" from when setup_x2_region() was done (similar to how the region stuff is done below)? > + > +# Poison by region: inject, list, clear, list. > +setup_x2_region > +create_region > +echo 0x40000000 > /sys/kernel/debug/cxl/"$mem0"/inject_poison > +echo 0x40000000 > /sys/kernel/debug/cxl/"$mem1"/inject_poison > +NR_ERRS=2 > +json=$("$CXL" list -r "$region" --poison | jq -r '.[].poison') > +find_media_errors > +echo 0x40000000 > /sys/kernel/debug/cxl/"$mem0"/clear_poison > +echo 0x40000000 > /sys/kernel/debug/cxl/"$mem1"/clear_poison It might be nice to create a couple of helpers - inject_poison_sysfs() { memdev="$1" addr="$2 ... } And similarly clear_poison_sysfs()... > +NR_ERRS=0 > +json=$("$CXL" list -r "$region" --poison | jq -r '.[].poison') > +find_media_errors > + > +check_dmesg "$LINENO" > +modprobe -r cxl-test > diff --git a/test/meson.build b/test/meson.build > index 224adaf41fcc..2706fa5d633c 100644 > --- a/test/meson.build > +++ b/test/meson.build > @@ -157,6 +157,7 @@ cxl_create_region = find_program('cxl-create-region.sh') > cxl_xor_region = find_program('cxl-xor-region.sh') > cxl_update_firmware = find_program('cxl-update-firmware.sh') > cxl_events = find_program('cxl-events.sh') > +cxl_poison = find_program('cxl-poison.sh') > > tests = [ > [ 'libndctl', libndctl, 'ndctl' ], > @@ -186,6 +187,7 @@ tests = [ > [ 'cxl-create-region.sh', cxl_create_region, 'cxl' ], > [ 'cxl-xor-region.sh', cxl_xor_region, 'cxl' ], > [ 'cxl-events.sh', cxl_events, 'cxl' ], > + [ 'cxl-poison.sh', cxl_poison, 'cxl' ], > ] > > if get_option('destructive').enabled()
On Wed, Nov 15, 2023 at 02:13:48AM -0800, Vishal Verma wrote: > On Sun, 2023-10-01 at 15:31 -0700, alison.schofield@intel.com wrote: > > From: Alison Schofield <alison.schofield@intel.com> > > > > Exercise cxl list, libcxl, and driver pieces of the get poison list > > pathway. Inject and clear poison using debugfs and use cxl-cli to > > read the poison list by memdev and by region. > > > > Signed-off-by: Alison Schofield <alison.schofield@intel.com> > > --- snip > > +cxl list > > "$CXL" list > > Also should reset rc from 77 so that it doesn't show as skipped on a > real failure. Done. > snip > > +setup_x2_region() > > +{ > > + # Find an x2 decoder > > + decoder=$($CXL list -b cxl_test -D -d root | jq -r ".[] | > > I suspect this comes from another test, but test/common defines a > $cxl_test_bus that can be used here. Done. > snip > > +find_media_errors() > > +{ > > + nr=$(echo $json | jq -r ".nr_poison_records") > > No need for echo and pipe - > > nr="$(jq -r ".nr_poison_records" <<< "$json")" Done > > Also, this currently assumes that a global '$json' will be available > and up to date. In this test the way it is called, this will always be > true, but it would be cleaner to actually pass $json to > find_media_errors() each time, and in here, do something like > > local json="$1" > Done > > + if [[ $nr -ne $NR_ERRS ]]; then > > If using the bash variant, [[ ]], this should be > > if [[ $nr != $NR_ERRS ]]; then > Done > > + echo "$mem: $NR_ERRS poison records expected, $nr found" > > + err "$LINENO" > > + fi > > +} > > + snip > > +find_media_errors > > For all of the above debugfs writes - > > mem1 is hard-coded - is this supposed to be "$mem1" from when > setup_x2_region() was done (similar to how the region stuff is done > below)? It was intentionally hardcoded based on what I expect in the cxl-test topology. Changed it in v3 to look up a memdev. > > > + > > +# Poison by region: inject, list, clear, list. > > +setup_x2_region > > +create_region > > +echo 0x40000000 > /sys/kernel/debug/cxl/"$mem0"/inject_poison > > +echo 0x40000000 > /sys/kernel/debug/cxl/"$mem1"/inject_poison > > +NR_ERRS=2 > > +json=$("$CXL" list -r "$region" --poison | jq -r '.[].poison') > > +find_media_errors > > +echo 0x40000000 > /sys/kernel/debug/cxl/"$mem0"/clear_poison > > +echo 0x40000000 > /sys/kernel/debug/cxl/"$mem1"/clear_poison > > It might be nice to create a couple of helpers - > > inject_poison_sysfs() { > memdev="$1" > addr="$2 > ... > } > > And similarly > > clear_poison_sysfs()... > Done Thanks for the review Vishal, especially the bash & jq wisdom! > >
diff --git a/test/cxl-poison.sh b/test/cxl-poison.sh new file mode 100644 index 000000000000..3c424532da7b --- /dev/null +++ b/test/cxl-poison.sh @@ -0,0 +1,103 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2022 Intel Corporation. All rights reserved. + +. $(dirname $0)/common + +rc=77 + +set -ex + +trap 'err $LINENO' ERR + +check_prereq "jq" + +modprobe -r cxl_test +modprobe cxl_test +cxl list + +# THEORY OF OPERATION: Exercise cxl-cli and cxl driver ability to +# inject, clear, and get the poison list. Do it by memdev and by region. +# Based on current cxl-test topology. + +create_region() +{ + region=$($CXL create-region -d $decoder -m $memdevs | jq -r ".region") + + if [[ ! $region ]]; then + echo "create-region failed for $decoder" + err "$LINENO" + fi +} + +setup_x2_region() +{ + # Find an x2 decoder + decoder=$($CXL list -b cxl_test -D -d root | jq -r ".[] | + select(.pmem_capable == true) | + select(.nr_targets == 2) | + .decoder") + + # Find a memdev for each host-bridge interleave position + port_dev0=$($CXL list -T -d $decoder | jq -r ".[] | + .targets | .[] | select(.position == 0) | .target") + port_dev1=$($CXL list -T -d $decoder | jq -r ".[] | + .targets | .[] | select(.position == 1) | .target") + mem0=$($CXL list -M -p $port_dev0 | jq -r ".[0].memdev") + mem1=$($CXL list -M -p $port_dev1 | jq -r ".[0].memdev") + memdevs="$mem0 $mem1" +} + +find_media_errors() +{ + nr=$(echo $json | jq -r ".nr_poison_records") + if [[ $nr -ne $NR_ERRS ]]; then + echo "$mem: $NR_ERRS poison records expected, $nr found" + err "$LINENO" + fi +} + +# Turn Tracing ON +# Note that 'cxl list --poison' does toggle the tracing, so +# turning it on here is to enable the test user to view inject +# and clear trace events, if they wish. +echo 1 > /sys/kernel/tracing/events/cxl/cxl_poison/enable + +# Using DEBUGFS: +# When cxl-cli support for inject and clear arrives, replace +# the writes to /sys/kernel/debug with the new cxl commands +# that wrap them. + +# Poison by memdev: inject, list, clear, list. +# Inject 2 into pmem and 2 into ram partition. +echo 0x40000000 > /sys/kernel/debug/cxl/mem1/inject_poison +echo 0x40001000 > /sys/kernel/debug/cxl/mem1/inject_poison +echo 0x0 > /sys/kernel/debug/cxl/mem1/inject_poison +echo 0x600 > /sys/kernel/debug/cxl/mem1/inject_poison +NR_ERRS=4 +json=$("$CXL" list -m mem1 --poison | jq -r '.[].poison') +find_media_errors +echo 0x40000000 > /sys/kernel/debug/cxl/mem1/clear_poison +echo 0x40001000 > /sys/kernel/debug/cxl/mem1/clear_poison +echo 0x0 > /sys/kernel/debug/cxl/mem1/clear_poison +echo 0x600 > /sys/kernel/debug/cxl/mem1/clear_poison +NR_ERRS=0 +json=$("$CXL" list -m mem1 --poison | jq -r '.[].poison') +find_media_errors + +# Poison by region: inject, list, clear, list. +setup_x2_region +create_region +echo 0x40000000 > /sys/kernel/debug/cxl/"$mem0"/inject_poison +echo 0x40000000 > /sys/kernel/debug/cxl/"$mem1"/inject_poison +NR_ERRS=2 +json=$("$CXL" list -r "$region" --poison | jq -r '.[].poison') +find_media_errors +echo 0x40000000 > /sys/kernel/debug/cxl/"$mem0"/clear_poison +echo 0x40000000 > /sys/kernel/debug/cxl/"$mem1"/clear_poison +NR_ERRS=0 +json=$("$CXL" list -r "$region" --poison | jq -r '.[].poison') +find_media_errors + +check_dmesg "$LINENO" +modprobe -r cxl-test diff --git a/test/meson.build b/test/meson.build index 224adaf41fcc..2706fa5d633c 100644 --- a/test/meson.build +++ b/test/meson.build @@ -157,6 +157,7 @@ cxl_create_region = find_program('cxl-create-region.sh') cxl_xor_region = find_program('cxl-xor-region.sh') cxl_update_firmware = find_program('cxl-update-firmware.sh') cxl_events = find_program('cxl-events.sh') +cxl_poison = find_program('cxl-poison.sh') tests = [ [ 'libndctl', libndctl, 'ndctl' ], @@ -186,6 +187,7 @@ tests = [ [ 'cxl-create-region.sh', cxl_create_region, 'cxl' ], [ 'cxl-xor-region.sh', cxl_xor_region, 'cxl' ], [ 'cxl-events.sh', cxl_events, 'cxl' ], + [ 'cxl-poison.sh', cxl_poison, 'cxl' ], ] if get_option('destructive').enabled()