diff mbox series

[ndctl,1/5] libcxl: add interfaces for GET_POISON_LIST mailbox commands

Message ID 73b2edf5ded979cb3164bcf2b76c4f300cdf2250.1668133294.git.alison.schofield@intel.com
State New, archived
Headers show
Series Support poison list retrieval | expand

Commit Message

Alison Schofield Nov. 11, 2022, 3:20 a.m. UTC
From: Alison Schofield <alison.schofield@intel.com>

CXL devices maintain a list of locations that are poisoned or result
in poison if the addresses are accessed by the host.

Per the spec (CXL 3.0 8.2.9.8.4.1), the device returns this Poison
list as a set of  Media Error Records that include the source of the
error, the starting device physical address and length.

Trigger the retrieval of the poison list by writing to the device
sysfs attribute: trigger_poison_list.

Retrieval is offered by memdev or by region:
int cxl_memdev_trigger_poison_list(struct cxl_memdev *memdev);
int cxl_region_trigger_poison_list(struct cxl_region *region);

This interface triggers the retrieval of the poison list from the
devices and logs the error records as kernel trace events named
'cxl_poison'.

Signed-off-by: Alison Schofield <alison.schofield@intel.com>
---
 cxl/lib/libcxl.c   | 44 ++++++++++++++++++++++++++++++++++++++++++++
 cxl/lib/libcxl.sym |  6 ++++++
 cxl/libcxl.h       |  2 ++
 3 files changed, 52 insertions(+)

Comments

Jonathan Cameron Nov. 16, 2022, 12:56 p.m. UTC | #1
On Thu, 10 Nov 2022 19:20:04 -0800
alison.schofield@intel.com wrote:

> From: Alison Schofield <alison.schofield@intel.com>
> 
> CXL devices maintain a list of locations that are poisoned or result
> in poison if the addresses are accessed by the host.
> 
> Per the spec (CXL 3.0 8.2.9.8.4.1), the device returns this Poison
> list as a set of  Media Error Records that include the source of the
> error, the starting device physical address and length.
> 
> Trigger the retrieval of the poison list by writing to the device
> sysfs attribute: trigger_poison_list.
> 
> Retrieval is offered by memdev or by region:
> int cxl_memdev_trigger_poison_list(struct cxl_memdev *memdev);
> int cxl_region_trigger_poison_list(struct cxl_region *region);
> 
> This interface triggers the retrieval of the poison list from the
> devices and logs the error records as kernel trace events named
> 'cxl_poison'.
> 
> Signed-off-by: Alison Schofield <alison.schofield@intel.com>
Trivial comment inline + I haven't been tracking closely development
of this tool closely so hopefully this will get other eyes on it who
are more familiar.  With that in mind:

Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>

> ---
>  cxl/lib/libcxl.c   | 44 ++++++++++++++++++++++++++++++++++++++++++++
>  cxl/lib/libcxl.sym |  6 ++++++
>  cxl/libcxl.h       |  2 ++
>  3 files changed, 52 insertions(+)
> 
> diff --git a/cxl/lib/libcxl.c b/cxl/lib/libcxl.c
> index e8c5d4444dd0..1a8a8eb0ffcb 100644
> --- a/cxl/lib/libcxl.c
> +++ b/cxl/lib/libcxl.c
> @@ -1331,6 +1331,50 @@ CXL_EXPORT int cxl_memdev_disable_invalidate(struct cxl_memdev *memdev)
>  	return 0;
>  }
>  
> +CXL_EXPORT int cxl_memdev_trigger_poison_list(struct cxl_memdev *memdev)
> +{
> +	struct cxl_ctx *ctx = cxl_memdev_get_ctx(memdev);
> +	char *path = memdev->dev_buf;
> +	int len = memdev->buf_len, rc;
> +
> +	if (snprintf(path, len, "%s/trigger_poison_list", memdev->dev_path) >=
> +	    len) {

Ugly line break choice to break mid argument..
	if (snprintf(path, len, "%s/trigger_poison_list",
		     memdev->dev_path) >= len) {
would be better.

> +		err(ctx, "%s: buffer too small\n",
> +		    cxl_memdev_get_devname(memdev));
> +		return -ENXIO;
> +	}
> +	rc = sysfs_write_attr(ctx, path, "1\n");
> +	if (rc < 0) {
> +		fprintf(stderr,
> +			"%s: Failed write sysfs attr trigger_poison_list\n",
> +			cxl_memdev_get_devname(memdev));
> +		return rc;
> +	}
> +	return 0;
> +}
> +
> +CXL_EXPORT int cxl_region_trigger_poison_list(struct cxl_region *region)
> +{
> +	struct cxl_ctx *ctx = cxl_region_get_ctx(region);
> +	char *path = region->dev_buf;
> +	int len = region->buf_len, rc;
> +
> +	if (snprintf(path, len, "%s/trigger_poison_list", region->dev_path) >=
> +	    len) {
as above.

> +		err(ctx, "%s: buffer too small\n",
> +		    cxl_region_get_devname(region));
> +		return -ENXIO;
> +	}
> +	rc = sysfs_write_attr(ctx, path, "1\n");
> +	if (rc < 0) {
> +		fprintf(stderr,
> +			"%s: Failed write sysfs attr trigger_poison_list\n",
> +			cxl_region_get_devname(region));
> +		return rc;
> +	}
> +	return 0;
> +}
> +
>  CXL_EXPORT int cxl_memdev_enable(struct cxl_memdev *memdev)
>  {
>  	struct cxl_ctx *ctx = cxl_memdev_get_ctx(memdev);
> diff --git a/cxl/lib/libcxl.sym b/cxl/lib/libcxl.sym
> index 8bb91e05638b..ecf98e6c7af2 100644
> --- a/cxl/lib/libcxl.sym
> +++ b/cxl/lib/libcxl.sym
> @@ -217,3 +217,9 @@ global:
>  	cxl_decoder_get_max_available_extent;
>  	cxl_decoder_get_region;
>  } LIBCXL_2;
> +
> +LIBCXL_4 {
> +global:
> +	cxl_memdev_trigger_poison_list;
> +	cxl_region_trigger_poison_list;
> +} LIBCXL_3;
> diff --git a/cxl/libcxl.h b/cxl/libcxl.h
> index 9fe4e99263dd..5ebdf0879325 100644
> --- a/cxl/libcxl.h
> +++ b/cxl/libcxl.h
> @@ -375,6 +375,8 @@ enum cxl_setpartition_mode {
>  
>  int cxl_cmd_partition_set_mode(struct cxl_cmd *cmd,
>  		enum cxl_setpartition_mode mode);
> +int cxl_memdev_trigger_poison_list(struct cxl_memdev *memdev);
> +int cxl_region_trigger_poison_list(struct cxl_region *region);
>  
>  #ifdef __cplusplus
>  } /* extern "C" */
Alison Schofield Nov. 17, 2022, 11:45 p.m. UTC | #2
On Wed, Nov 16, 2022 at 12:56:40PM +0000, Jonathan Cameron wrote:
> On Thu, 10 Nov 2022 19:20:04 -0800
> alison.schofield@intel.com wrote:
> 
> > From: Alison Schofield <alison.schofield@intel.com>
> > 
> > CXL devices maintain a list of locations that are poisoned or result
> > in poison if the addresses are accessed by the host.
> > 
> > Per the spec (CXL 3.0 8.2.9.8.4.1), the device returns this Poison
> > list as a set of  Media Error Records that include the source of the
> > error, the starting device physical address and length.
> > 
> > Trigger the retrieval of the poison list by writing to the device
> > sysfs attribute: trigger_poison_list.
> > 
> > Retrieval is offered by memdev or by region:
> > int cxl_memdev_trigger_poison_list(struct cxl_memdev *memdev);
> > int cxl_region_trigger_poison_list(struct cxl_region *region);
> > 
> > This interface triggers the retrieval of the poison list from the
> > devices and logs the error records as kernel trace events named
> > 'cxl_poison'.
> > 
> > Signed-off-by: Alison Schofield <alison.schofield@intel.com>
> Trivial comment inline + I haven't been tracking closely development
> of this tool closely so hopefully this will get other eyes on it who
> are more familiar.  With that in mind:
> 
> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>

Thanks Jonathan!
I cleaned up the ugly line breaks and I'll carry your Reviewed-by
forward.

> 
> > ---
> >  cxl/lib/libcxl.c   | 44 ++++++++++++++++++++++++++++++++++++++++++++
> >  cxl/lib/libcxl.sym |  6 ++++++
> >  cxl/libcxl.h       |  2 ++
> >  3 files changed, 52 insertions(+)
> > 
> > diff --git a/cxl/lib/libcxl.c b/cxl/lib/libcxl.c
> > index e8c5d4444dd0..1a8a8eb0ffcb 100644
> > --- a/cxl/lib/libcxl.c
> > +++ b/cxl/lib/libcxl.c
> > @@ -1331,6 +1331,50 @@ CXL_EXPORT int cxl_memdev_disable_invalidate(struct cxl_memdev *memdev)
> >  	return 0;
> >  }
> >  
> > +CXL_EXPORT int cxl_memdev_trigger_poison_list(struct cxl_memdev *memdev)
> > +{
> > +	struct cxl_ctx *ctx = cxl_memdev_get_ctx(memdev);
> > +	char *path = memdev->dev_buf;
> > +	int len = memdev->buf_len, rc;
> > +
> > +	if (snprintf(path, len, "%s/trigger_poison_list", memdev->dev_path) >=
> > +	    len) {
> 
> Ugly line break choice to break mid argument..
> 	if (snprintf(path, len, "%s/trigger_poison_list",
> 		     memdev->dev_path) >= len) {
> would be better.
> 
> > +		err(ctx, "%s: buffer too small\n",
> > +		    cxl_memdev_get_devname(memdev));
> > +		return -ENXIO;
> > +	}
> > +	rc = sysfs_write_attr(ctx, path, "1\n");
> > +	if (rc < 0) {
> > +		fprintf(stderr,
> > +			"%s: Failed write sysfs attr trigger_poison_list\n",
> > +			cxl_memdev_get_devname(memdev));
> > +		return rc;
> > +	}
> > +	return 0;
> > +}
> > +
> > +CXL_EXPORT int cxl_region_trigger_poison_list(struct cxl_region *region)
> > +{
> > +	struct cxl_ctx *ctx = cxl_region_get_ctx(region);
> > +	char *path = region->dev_buf;
> > +	int len = region->buf_len, rc;
> > +
> > +	if (snprintf(path, len, "%s/trigger_poison_list", region->dev_path) >=
> > +	    len) {
> as above.
> 
> > +		err(ctx, "%s: buffer too small\n",
> > +		    cxl_region_get_devname(region));
> > +		return -ENXIO;
> > +	}
> > +	rc = sysfs_write_attr(ctx, path, "1\n");
> > +	if (rc < 0) {
> > +		fprintf(stderr,
> > +			"%s: Failed write sysfs attr trigger_poison_list\n",
> > +			cxl_region_get_devname(region));
> > +		return rc;
> > +	}
> > +	return 0;
> > +}
> > +
> >  CXL_EXPORT int cxl_memdev_enable(struct cxl_memdev *memdev)
> >  {
> >  	struct cxl_ctx *ctx = cxl_memdev_get_ctx(memdev);
> > diff --git a/cxl/lib/libcxl.sym b/cxl/lib/libcxl.sym
> > index 8bb91e05638b..ecf98e6c7af2 100644
> > --- a/cxl/lib/libcxl.sym
> > +++ b/cxl/lib/libcxl.sym
> > @@ -217,3 +217,9 @@ global:
> >  	cxl_decoder_get_max_available_extent;
> >  	cxl_decoder_get_region;
> >  } LIBCXL_2;
> > +
> > +LIBCXL_4 {
> > +global:
> > +	cxl_memdev_trigger_poison_list;
> > +	cxl_region_trigger_poison_list;
> > +} LIBCXL_3;
> > diff --git a/cxl/libcxl.h b/cxl/libcxl.h
> > index 9fe4e99263dd..5ebdf0879325 100644
> > --- a/cxl/libcxl.h
> > +++ b/cxl/libcxl.h
> > @@ -375,6 +375,8 @@ enum cxl_setpartition_mode {
> >  
> >  int cxl_cmd_partition_set_mode(struct cxl_cmd *cmd,
> >  		enum cxl_setpartition_mode mode);
> > +int cxl_memdev_trigger_poison_list(struct cxl_memdev *memdev);
> > +int cxl_region_trigger_poison_list(struct cxl_region *region);
> >  
> >  #ifdef __cplusplus
> >  } /* extern "C" */
>
diff mbox series

Patch

diff --git a/cxl/lib/libcxl.c b/cxl/lib/libcxl.c
index e8c5d4444dd0..1a8a8eb0ffcb 100644
--- a/cxl/lib/libcxl.c
+++ b/cxl/lib/libcxl.c
@@ -1331,6 +1331,50 @@  CXL_EXPORT int cxl_memdev_disable_invalidate(struct cxl_memdev *memdev)
 	return 0;
 }
 
+CXL_EXPORT int cxl_memdev_trigger_poison_list(struct cxl_memdev *memdev)
+{
+	struct cxl_ctx *ctx = cxl_memdev_get_ctx(memdev);
+	char *path = memdev->dev_buf;
+	int len = memdev->buf_len, rc;
+
+	if (snprintf(path, len, "%s/trigger_poison_list", memdev->dev_path) >=
+	    len) {
+		err(ctx, "%s: buffer too small\n",
+		    cxl_memdev_get_devname(memdev));
+		return -ENXIO;
+	}
+	rc = sysfs_write_attr(ctx, path, "1\n");
+	if (rc < 0) {
+		fprintf(stderr,
+			"%s: Failed write sysfs attr trigger_poison_list\n",
+			cxl_memdev_get_devname(memdev));
+		return rc;
+	}
+	return 0;
+}
+
+CXL_EXPORT int cxl_region_trigger_poison_list(struct cxl_region *region)
+{
+	struct cxl_ctx *ctx = cxl_region_get_ctx(region);
+	char *path = region->dev_buf;
+	int len = region->buf_len, rc;
+
+	if (snprintf(path, len, "%s/trigger_poison_list", region->dev_path) >=
+	    len) {
+		err(ctx, "%s: buffer too small\n",
+		    cxl_region_get_devname(region));
+		return -ENXIO;
+	}
+	rc = sysfs_write_attr(ctx, path, "1\n");
+	if (rc < 0) {
+		fprintf(stderr,
+			"%s: Failed write sysfs attr trigger_poison_list\n",
+			cxl_region_get_devname(region));
+		return rc;
+	}
+	return 0;
+}
+
 CXL_EXPORT int cxl_memdev_enable(struct cxl_memdev *memdev)
 {
 	struct cxl_ctx *ctx = cxl_memdev_get_ctx(memdev);
diff --git a/cxl/lib/libcxl.sym b/cxl/lib/libcxl.sym
index 8bb91e05638b..ecf98e6c7af2 100644
--- a/cxl/lib/libcxl.sym
+++ b/cxl/lib/libcxl.sym
@@ -217,3 +217,9 @@  global:
 	cxl_decoder_get_max_available_extent;
 	cxl_decoder_get_region;
 } LIBCXL_2;
+
+LIBCXL_4 {
+global:
+	cxl_memdev_trigger_poison_list;
+	cxl_region_trigger_poison_list;
+} LIBCXL_3;
diff --git a/cxl/libcxl.h b/cxl/libcxl.h
index 9fe4e99263dd..5ebdf0879325 100644
--- a/cxl/libcxl.h
+++ b/cxl/libcxl.h
@@ -375,6 +375,8 @@  enum cxl_setpartition_mode {
 
 int cxl_cmd_partition_set_mode(struct cxl_cmd *cmd,
 		enum cxl_setpartition_mode mode);
+int cxl_memdev_trigger_poison_list(struct cxl_memdev *memdev);
+int cxl_region_trigger_poison_list(struct cxl_region *region);
 
 #ifdef __cplusplus
 } /* extern "C" */