diff mbox series

[ndctl,v2,5/5] cxl/test: add cxl-poison.sh unit test

Message ID 51fdd212d139d203506cc2ee18abb362e5859e3e.1696196382.git.alison.schofield@intel.com (mailing list archive)
State Superseded
Delegated to: Vishal Verma
Headers show
Series Support poison list retrieval | expand

Commit Message

Alison Schofield Oct. 1, 2023, 10:31 p.m. UTC
From: Alison Schofield <alison.schofield@intel.com>

Exercise cxl list, libcxl, and driver pieces of the get poison list
pathway. Inject and clear poison using debugfs and use cxl-cli to
read the poison list by memdev and by region.

Signed-off-by: Alison Schofield <alison.schofield@intel.com>
---
 test/cxl-poison.sh | 103 +++++++++++++++++++++++++++++++++++++++++++++
 test/meson.build   |   2 +
 2 files changed, 105 insertions(+)
 create mode 100644 test/cxl-poison.sh

Comments

Verma, Vishal L Nov. 15, 2023, 10:13 a.m. UTC | #1
On Sun, 2023-10-01 at 15:31 -0700, alison.schofield@intel.com wrote:
> From: Alison Schofield <alison.schofield@intel.com>
> 
> Exercise cxl list, libcxl, and driver pieces of the get poison list
> pathway. Inject and clear poison using debugfs and use cxl-cli to
> read the poison list by memdev and by region.
> 
> Signed-off-by: Alison Schofield <alison.schofield@intel.com>
> ---
>  test/cxl-poison.sh | 103 +++++++++++++++++++++++++++++++++++++++++++++
>  test/meson.build   |   2 +
>  2 files changed, 105 insertions(+)
>  create mode 100644 test/cxl-poison.sh
> 
> diff --git a/test/cxl-poison.sh b/test/cxl-poison.sh
> new file mode 100644
> index 000000000000..3c424532da7b
> --- /dev/null
> +++ b/test/cxl-poison.sh
> @@ -0,0 +1,103 @@
> +#!/bin/bash
> +# SPDX-License-Identifier: GPL-2.0
> +# Copyright (C) 2022 Intel Corporation. All rights reserved.
> +
> +. $(dirname $0)/common
> +
> +rc=77
> +
> +set -ex
> +
> +trap 'err $LINENO' ERR
> +
> +check_prereq "jq"
> +
> +modprobe -r cxl_test
> +modprobe cxl_test
> +cxl list

"$CXL" list

Also should reset rc from 77 so that it doesn't show as skipped on a
real failure.

> +
> +# THEORY OF OPERATION: Exercise cxl-cli and cxl driver ability to
> +# inject, clear, and get the poison list. Do it by memdev and by region.
> +# Based on current cxl-test topology.
> +
> +create_region()
> +{
> +       region=$($CXL create-region -d $decoder -m $memdevs | jq -r ".region")
> +
> +       if [[ ! $region ]]; then
> +               echo "create-region failed for $decoder"
> +               err "$LINENO"
> +       fi
> +}
> +
> +setup_x2_region()
> +{
> +        # Find an x2 decoder
> +        decoder=$($CXL list -b cxl_test -D -d root | jq -r ".[] |

I suspect this comes from another test, but test/common defines a
$cxl_test_bus that can be used here.

> +          select(.pmem_capable == true) |
> +          select(.nr_targets == 2) |
> +          .decoder")
> +
> +        # Find a memdev for each host-bridge interleave position
> +        port_dev0=$($CXL list -T -d $decoder | jq -r ".[] |
> +            .targets | .[] | select(.position == 0) | .target")
> +        port_dev1=$($CXL list -T -d $decoder | jq -r ".[] |
> +            .targets | .[] | select(.position == 1) | .target")
> +        mem0=$($CXL list -M -p $port_dev0 | jq -r ".[0].memdev")
> +        mem1=$($CXL list -M -p $port_dev1 | jq -r ".[0].memdev")
> +        memdevs="$mem0 $mem1"
> +}
> +
> +find_media_errors()
> +{
> +       nr=$(echo $json | jq -r ".nr_poison_records")

No need for echo and pipe - 

  nr="$(jq -r ".nr_poison_records" <<< "$json")"

Also, this currently assumes that a global '$json' will be available
and up to date. In this test the way it is called, this will always be
true, but it would be cleaner to actually pass $json to
find_media_errors() each time, and in here, do something like 

  local json="$1"

> +       if [[ $nr -ne $NR_ERRS ]]; then

If using the bash variant, [[ ]], this should be

  if [[ $nr != $NR_ERRS ]]; then

> +               echo "$mem: $NR_ERRS poison records expected, $nr found"
> +               err "$LINENO"
> +       fi
> +}
> +
> +# Turn Tracing ON
> +# Note that 'cxl list --poison' does toggle the tracing, so
> +# turning it on here is to enable the test user to view inject
> +# and clear trace events, if they wish.
> +echo 1 > /sys/kernel/tracing/events/cxl/cxl_poison/enable
> +
> +# Using DEBUGFS:
> +# When cxl-cli support for inject and clear arrives, replace
> +# the writes to /sys/kernel/debug with the new cxl commands
> +# that wrap them.
> +
> +# Poison by memdev: inject, list, clear, list.
> +# Inject 2 into pmem and 2 into ram partition.
> +echo 0x40000000 > /sys/kernel/debug/cxl/mem1/inject_poison
> +echo 0x40001000 > /sys/kernel/debug/cxl/mem1/inject_poison
> +echo 0x0       > /sys/kernel/debug/cxl/mem1/inject_poison
> +echo 0x600     > /sys/kernel/debug/cxl/mem1/inject_poison
> +NR_ERRS=4
> +json=$("$CXL" list -m mem1 --poison | jq -r '.[].poison')
> +find_media_errors
> +echo 0x40000000 > /sys/kernel/debug/cxl/mem1/clear_poison
> +echo 0x40001000 > /sys/kernel/debug/cxl/mem1/clear_poison
> +echo 0x0       > /sys/kernel/debug/cxl/mem1/clear_poison
> +echo 0x600     > /sys/kernel/debug/cxl/mem1/clear_poison
> +NR_ERRS=0
> +json=$("$CXL" list -m mem1 --poison | jq -r '.[].poison')
> +find_media_errors

For all of the above debugfs writes -

mem1 is hard-coded - is this supposed to be "$mem1" from when
setup_x2_region() was done (similar to how the region stuff is done
below)?

> +
> +# Poison by region: inject, list, clear, list.
> +setup_x2_region
> +create_region
> +echo 0x40000000 > /sys/kernel/debug/cxl/"$mem0"/inject_poison
> +echo 0x40000000 > /sys/kernel/debug/cxl/"$mem1"/inject_poison
> +NR_ERRS=2
> +json=$("$CXL" list -r "$region" --poison | jq -r '.[].poison')
> +find_media_errors
> +echo 0x40000000 > /sys/kernel/debug/cxl/"$mem0"/clear_poison
> +echo 0x40000000 > /sys/kernel/debug/cxl/"$mem1"/clear_poison

It might be nice to create a couple of helpers -

  inject_poison_sysfs() {
    memdev="$1"
    addr="$2
    ...
  }

And similarly

  clear_poison_sysfs()...

> +NR_ERRS=0
> +json=$("$CXL" list -r "$region" --poison | jq -r '.[].poison')
> +find_media_errors
> +
> +check_dmesg "$LINENO"
> +modprobe -r cxl-test
> diff --git a/test/meson.build b/test/meson.build
> index 224adaf41fcc..2706fa5d633c 100644
> --- a/test/meson.build
> +++ b/test/meson.build
> @@ -157,6 +157,7 @@ cxl_create_region = find_program('cxl-create-region.sh')
>  cxl_xor_region = find_program('cxl-xor-region.sh')
>  cxl_update_firmware = find_program('cxl-update-firmware.sh')
>  cxl_events = find_program('cxl-events.sh')
> +cxl_poison = find_program('cxl-poison.sh')
>  
>  tests = [
>    [ 'libndctl',               libndctl,                  'ndctl' ],
> @@ -186,6 +187,7 @@ tests = [
>    [ 'cxl-create-region.sh',   cxl_create_region,  'cxl'   ],
>    [ 'cxl-xor-region.sh',      cxl_xor_region,     'cxl'   ],
>    [ 'cxl-events.sh',          cxl_events,         'cxl'   ],
> +  [ 'cxl-poison.sh',          cxl_poison,         'cxl'   ],
>  ]
>  
>  if get_option('destructive').enabled()
Alison Schofield Nov. 17, 2023, 4:52 p.m. UTC | #2
On Wed, Nov 15, 2023 at 02:13:48AM -0800, Vishal Verma wrote:
> On Sun, 2023-10-01 at 15:31 -0700, alison.schofield@intel.com wrote:
> > From: Alison Schofield <alison.schofield@intel.com>
> >
> > Exercise cxl list, libcxl, and driver pieces of the get poison list
> > pathway. Inject and clear poison using debugfs and use cxl-cli to
> > read the poison list by memdev and by region.
> >
> > Signed-off-by: Alison Schofield <alison.schofield@intel.com>
> > ---

snip

> > +cxl list
> 
> "$CXL" list
> 
> Also should reset rc from 77 so that it doesn't show as skipped on a
> real failure.

Done.

>
snip

> > +setup_x2_region()
> > +{
> > +        # Find an x2 decoder
> > +        decoder=$($CXL list -b cxl_test -D -d root | jq -r ".[] |
> 
> I suspect this comes from another test, but test/common defines a
> $cxl_test_bus that can be used here.

Done.

> 
snip

> > +find_media_errors()
> > +{
> > +       nr=$(echo $json | jq -r ".nr_poison_records")
> 
> No need for echo and pipe -
> 
>   nr="$(jq -r ".nr_poison_records" <<< "$json")"

Done

> 
> Also, this currently assumes that a global '$json' will be available
> and up to date. In this test the way it is called, this will always be
> true, but it would be cleaner to actually pass $json to
> find_media_errors() each time, and in here, do something like
> 
>   local json="$1"
> 

Done

> > +       if [[ $nr -ne $NR_ERRS ]]; then
> 
> If using the bash variant, [[ ]], this should be
> 
>   if [[ $nr != $NR_ERRS ]]; then
> 

Done

> > +               echo "$mem: $NR_ERRS poison records expected, $nr found"
> > +               err "$LINENO"
> > +       fi
> > +}
> > +
snip

> > +find_media_errors
> 
> For all of the above debugfs writes -
> 
> mem1 is hard-coded - is this supposed to be "$mem1" from when
> setup_x2_region() was done (similar to how the region stuff is done
> below)?

It was intentionally hardcoded based on what I expect in the
cxl-test topology. 

Changed it in v3 to look up a memdev.

> 
> > +
> > +# Poison by region: inject, list, clear, list.
> > +setup_x2_region
> > +create_region
> > +echo 0x40000000 > /sys/kernel/debug/cxl/"$mem0"/inject_poison
> > +echo 0x40000000 > /sys/kernel/debug/cxl/"$mem1"/inject_poison
> > +NR_ERRS=2
> > +json=$("$CXL" list -r "$region" --poison | jq -r '.[].poison')
> > +find_media_errors
> > +echo 0x40000000 > /sys/kernel/debug/cxl/"$mem0"/clear_poison
> > +echo 0x40000000 > /sys/kernel/debug/cxl/"$mem1"/clear_poison
> 
> It might be nice to create a couple of helpers -
> 
>   inject_poison_sysfs() {
>     memdev="$1"
>     addr="$2
>     ...
>   }
> 
> And similarly
> 
>   clear_poison_sysfs()...
>

Done

Thanks for the review Vishal, especially the bash & jq wisdom!
> >
diff mbox series

Patch

diff --git a/test/cxl-poison.sh b/test/cxl-poison.sh
new file mode 100644
index 000000000000..3c424532da7b
--- /dev/null
+++ b/test/cxl-poison.sh
@@ -0,0 +1,103 @@ 
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2022 Intel Corporation. All rights reserved.
+
+. $(dirname $0)/common
+
+rc=77
+
+set -ex
+
+trap 'err $LINENO' ERR
+
+check_prereq "jq"
+
+modprobe -r cxl_test
+modprobe cxl_test
+cxl list
+
+# THEORY OF OPERATION: Exercise cxl-cli and cxl driver ability to
+# inject, clear, and get the poison list. Do it by memdev and by region.
+# Based on current cxl-test topology.
+
+create_region()
+{
+	region=$($CXL create-region -d $decoder -m $memdevs | jq -r ".region")
+
+	if [[ ! $region ]]; then
+		echo "create-region failed for $decoder"
+		err "$LINENO"
+	fi
+}
+
+setup_x2_region()
+{
+        # Find an x2 decoder
+        decoder=$($CXL list -b cxl_test -D -d root | jq -r ".[] |
+          select(.pmem_capable == true) |
+          select(.nr_targets == 2) |
+          .decoder")
+
+        # Find a memdev for each host-bridge interleave position
+        port_dev0=$($CXL list -T -d $decoder | jq -r ".[] |
+            .targets | .[] | select(.position == 0) | .target")
+        port_dev1=$($CXL list -T -d $decoder | jq -r ".[] |
+            .targets | .[] | select(.position == 1) | .target")
+        mem0=$($CXL list -M -p $port_dev0 | jq -r ".[0].memdev")
+        mem1=$($CXL list -M -p $port_dev1 | jq -r ".[0].memdev")
+        memdevs="$mem0 $mem1"
+}
+
+find_media_errors()
+{
+	nr=$(echo $json | jq -r ".nr_poison_records")
+	if [[ $nr -ne $NR_ERRS ]]; then
+		echo "$mem: $NR_ERRS poison records expected, $nr found"
+		err "$LINENO"
+	fi
+}
+
+# Turn Tracing ON
+# Note that 'cxl list --poison' does toggle the tracing, so
+# turning it on here is to enable the test user to view inject
+# and clear trace events, if they wish.
+echo 1 > /sys/kernel/tracing/events/cxl/cxl_poison/enable
+
+# Using DEBUGFS:
+# When cxl-cli support for inject and clear arrives, replace
+# the writes to /sys/kernel/debug with the new cxl commands
+# that wrap them.
+
+# Poison by memdev: inject, list, clear, list.
+# Inject 2 into pmem and 2 into ram partition.
+echo 0x40000000 > /sys/kernel/debug/cxl/mem1/inject_poison
+echo 0x40001000 > /sys/kernel/debug/cxl/mem1/inject_poison
+echo 0x0 	> /sys/kernel/debug/cxl/mem1/inject_poison
+echo 0x600	> /sys/kernel/debug/cxl/mem1/inject_poison
+NR_ERRS=4
+json=$("$CXL" list -m mem1 --poison | jq -r '.[].poison')
+find_media_errors
+echo 0x40000000 > /sys/kernel/debug/cxl/mem1/clear_poison
+echo 0x40001000 > /sys/kernel/debug/cxl/mem1/clear_poison
+echo 0x0 	> /sys/kernel/debug/cxl/mem1/clear_poison
+echo 0x600	> /sys/kernel/debug/cxl/mem1/clear_poison
+NR_ERRS=0
+json=$("$CXL" list -m mem1 --poison | jq -r '.[].poison')
+find_media_errors
+
+# Poison by region: inject, list, clear, list.
+setup_x2_region
+create_region
+echo 0x40000000 > /sys/kernel/debug/cxl/"$mem0"/inject_poison
+echo 0x40000000 > /sys/kernel/debug/cxl/"$mem1"/inject_poison
+NR_ERRS=2
+json=$("$CXL" list -r "$region" --poison | jq -r '.[].poison')
+find_media_errors
+echo 0x40000000 > /sys/kernel/debug/cxl/"$mem0"/clear_poison
+echo 0x40000000 > /sys/kernel/debug/cxl/"$mem1"/clear_poison
+NR_ERRS=0
+json=$("$CXL" list -r "$region" --poison | jq -r '.[].poison')
+find_media_errors
+
+check_dmesg "$LINENO"
+modprobe -r cxl-test
diff --git a/test/meson.build b/test/meson.build
index 224adaf41fcc..2706fa5d633c 100644
--- a/test/meson.build
+++ b/test/meson.build
@@ -157,6 +157,7 @@  cxl_create_region = find_program('cxl-create-region.sh')
 cxl_xor_region = find_program('cxl-xor-region.sh')
 cxl_update_firmware = find_program('cxl-update-firmware.sh')
 cxl_events = find_program('cxl-events.sh')
+cxl_poison = find_program('cxl-poison.sh')
 
 tests = [
   [ 'libndctl',               libndctl,		  'ndctl' ],
@@ -186,6 +187,7 @@  tests = [
   [ 'cxl-create-region.sh',   cxl_create_region,  'cxl'   ],
   [ 'cxl-xor-region.sh',      cxl_xor_region,     'cxl'   ],
   [ 'cxl-events.sh',          cxl_events,         'cxl'   ],
+  [ 'cxl-poison.sh',          cxl_poison,         'cxl'   ],
 ]
 
 if get_option('destructive').enabled()