diff mbox series

[1/3] trace, cxl: Introduce a TRACE_EVENT for CXL Poison Records

Message ID 32a761fe7046680a4d50762fc43988def24a4bcd.1655250669.git.alison.schofield@intel.com
State New, archived
Headers show
Series CXL Poison List Retrieval & Tracing | expand

Commit Message

Alison Schofield June 15, 2022, 12:10 a.m. UTC
From: Alison Schofield <alison.schofield@intel.com>

Add a trace event for CXL Poison List Media Error Records that
includes the starting DPA of the poison, the length, and the
the source of the poison.

This trace event will be used by the CXL_MEM driver to log the
Media Errors returned by the GET_POISON_LIST Mailbox command.

Signed-off-by: Alison Schofield <alison.schofield@intel.com>
---
 include/trace/events/cxl.h | 60 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 60 insertions(+)
 create mode 100644 include/trace/events/cxl.h

Comments

Steven Rostedt June 15, 2022, 1:15 a.m. UTC | #1
On Tue, 14 Jun 2022 17:10:26 -0700
alison.schofield@intel.com wrote:

> From: Alison Schofield <alison.schofield@intel.com>
> 
> Add a trace event for CXL Poison List Media Error Records that
> includes the starting DPA of the poison, the length, and the
> the source of the poison.
> 
> This trace event will be used by the CXL_MEM driver to log the
> Media Errors returned by the GET_POISON_LIST Mailbox command.
> 
> Signed-off-by: Alison Schofield <alison.schofield@intel.com>
> ---
>  include/trace/events/cxl.h | 60 ++++++++++++++++++++++++++++++++++++++
>  1 file changed, 60 insertions(+)
>  create mode 100644 include/trace/events/cxl.h
> 
> diff --git a/include/trace/events/cxl.h b/include/trace/events/cxl.h
> new file mode 100644
> index 000000000000..17e707c3817e
> --- /dev/null
> +++ b/include/trace/events/cxl.h
> @@ -0,0 +1,60 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#undef TRACE_SYSTEM
> +#define TRACE_SYSTEM cxl
> +
> +#if !defined(_CXL_TRACE_H) ||  defined(TRACE_HEADER_MULTI_READ)
> +#define _CXL_TRACE_H
> +
> +#include <linux/tracepoint.h>
> +
> +TRACE_DEFINE_ENUM(CXL_POISON_SOURCE_UNKNOWN);
> +TRACE_DEFINE_ENUM(CXL_POISON_SOURCE_INTERNAL);
> +TRACE_DEFINE_ENUM(CXL_POISON_SOURCE_EXTERNAL);
> +TRACE_DEFINE_ENUM(CXL_POISON_SOURCE_INJECTED);
> +TRACE_DEFINE_ENUM(CXL_POISON_SOURCE_VENDOR);
> +TRACE_DEFINE_ENUM(CXL_POISON_SOURCE_INVALID);
> +
> +#define show_poison_source(source)					\
> +	__print_symbolic(source,					\
> +			{CXL_POISON_SOURCE_UNKNOWN,  "UNKNOWN"},	\
> +			{CXL_POISON_SOURCE_EXTERNAL, "EXTERNAL"},	\
> +			{CXL_POISON_SOURCE_INTERNAL, "INTERNAL"},	\
> +			{CXL_POISON_SOURCE_INJECTED, "INJECTED"},	\
> +			{CXL_POISON_SOURCE_VENDOR,   "VENDOR"},		\
> +			{CXL_POISON_SOURCE_INVALID,  "INVALID"})
> +
> +TRACE_EVENT(cxl_poison_list,
> +
> +	    TP_PROTO(struct device *dev,
> +		     int source,
> +		     unsigned long start,
> +		     unsigned int length),
> +
> +	    TP_ARGS(dev, source, start, length),
> +
> +	    TP_STRUCT__entry(
> +		__string(name, dev_name(dev))
> +		__field(int, source)
> +		__field(u64, start)
> +		__field(u32, length)

OK, the above order should be fine, without adding any holes. The
__string() is 4 bytes as well as the "int". Which keeps it aligned with the
u64 (8 bytes), followed by a u32, which is 4 bytes.

From a tracing perspective:

Reviewed-by: Steven Rostedt (VMware) <rostedt@goodmis.org>

-- Steve


> +	    ),
> +
> +	    TP_fast_assign(
> +		__assign_str(name, dev_name(dev));
> +		__entry->source = source;
> +		__entry->start = start;
> +		__entry->length = length;
> +	    ),
> +
> +	    TP_printk("dev %s source %s start %llu length %u",
> +		__get_str(name),
> +		show_poison_source(__entry->source),
> +		__entry->start,
> +		__entry->length)
> +);
> +#endif /* _CXL_TRACE_H */
> +
> +/* This part must be outside protection */
> +#undef TRACE_INCLUDE_FILE
> +#define TRACE_INCLUDE_FILE cxl
> +#include <trace/define_trace.h>
Davidlohr Bueso June 16, 2022, 7:45 p.m. UTC | #2
On Tue, 14 Jun 2022, alison.schofield@intel.com wrote:

>From: Alison Schofield <alison.schofield@intel.com>
>
>Add a trace event for CXL Poison List Media Error Records that
>includes the starting DPA of the poison, the length, and the
>the source of the poison.
>
>This trace event will be used by the CXL_MEM driver to log the
>Media Errors returned by the GET_POISON_LIST Mailbox command.
>
>Signed-off-by: Alison Schofield <alison.schofield@intel.com>
>Reviewed-by: Steven Rostedt (VMware) <rostedt@goodmis.org>

Reviewed-by: Davidlohr Bueso <dave@stgolabs.net>

>---
> include/trace/events/cxl.h | 60 ++++++++++++++++++++++++++++++++++++++
> 1 file changed, 60 insertions(+)
> create mode 100644 include/trace/events/cxl.h
>
>diff --git a/include/trace/events/cxl.h b/include/trace/events/cxl.h
>new file mode 100644
>index 000000000000..17e707c3817e
>--- /dev/null
>+++ b/include/trace/events/cxl.h
>@@ -0,0 +1,60 @@
>+/* SPDX-License-Identifier: GPL-2.0 */
>+#undef TRACE_SYSTEM
>+#define TRACE_SYSTEM cxl
>+
>+#if !defined(_CXL_TRACE_H) ||  defined(TRACE_HEADER_MULTI_READ)
>+#define _CXL_TRACE_H
>+
>+#include <linux/tracepoint.h>
>+
>+TRACE_DEFINE_ENUM(CXL_POISON_SOURCE_UNKNOWN);
>+TRACE_DEFINE_ENUM(CXL_POISON_SOURCE_INTERNAL);
>+TRACE_DEFINE_ENUM(CXL_POISON_SOURCE_EXTERNAL);
>+TRACE_DEFINE_ENUM(CXL_POISON_SOURCE_INJECTED);
>+TRACE_DEFINE_ENUM(CXL_POISON_SOURCE_VENDOR);
>+TRACE_DEFINE_ENUM(CXL_POISON_SOURCE_INVALID);
>+
>+#define show_poison_source(source)					\
>+	__print_symbolic(source,					\
>+			{CXL_POISON_SOURCE_UNKNOWN,  "UNKNOWN"},	\
>+			{CXL_POISON_SOURCE_EXTERNAL, "EXTERNAL"},	\
>+			{CXL_POISON_SOURCE_INTERNAL, "INTERNAL"},	\
>+			{CXL_POISON_SOURCE_INJECTED, "INJECTED"},	\
>+			{CXL_POISON_SOURCE_VENDOR,   "VENDOR"},		\
>+			{CXL_POISON_SOURCE_INVALID,  "INVALID"})
>+
>+TRACE_EVENT(cxl_poison_list,
>+
>+	    TP_PROTO(struct device *dev,
>+		     int source,
>+		     unsigned long start,
>+		     unsigned int length),
>+
>+	    TP_ARGS(dev, source, start, length),
>+
>+	    TP_STRUCT__entry(
>+		__string(name, dev_name(dev))
>+		__field(int, source)
>+		__field(u64, start)
>+		__field(u32, length)
>+	    ),
>+
>+	    TP_fast_assign(
>+		__assign_str(name, dev_name(dev));
>+		__entry->source = source;
>+		__entry->start = start;
>+		__entry->length = length;
>+	    ),
>+
>+	    TP_printk("dev %s source %s start %llu length %u",
>+		__get_str(name),
>+		show_poison_source(__entry->source),
>+		__entry->start,
>+		__entry->length)
>+);
>+#endif /* _CXL_TRACE_H */
>+
>+/* This part must be outside protection */
>+#undef TRACE_INCLUDE_FILE
>+#define TRACE_INCLUDE_FILE cxl
>+#include <trace/define_trace.h>
>
>--
>2.31.1
>
Jonathan Cameron June 17, 2022, 4:17 p.m. UTC | #3
On Tue, 14 Jun 2022 17:10:26 -0700
alison.schofield@intel.com wrote:

> From: Alison Schofield <alison.schofield@intel.com>
> 
> Add a trace event for CXL Poison List Media Error Records that
> includes the starting DPA of the poison, the length, and the
> the source of the poison.
> 
> This trace event will be used by the CXL_MEM driver to log the
> Media Errors returned by the GET_POISON_LIST Mailbox command.
> 
> Signed-off-by: Alison Schofield <alison.schofield@intel.com>

Some comments on how this is used than the actual trace point for which

Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>

The patch set currently just pushes through the length as provided
in the CXL record.  That is in units of 64 byte cachelines so I'd suggest
the result will be more readable if we multiply it up to be in bytes.

Also the address should be masked to the same length, but I mentioned that
at the caller.

Jonathan


> ---
>  include/trace/events/cxl.h | 60 ++++++++++++++++++++++++++++++++++++++
>  1 file changed, 60 insertions(+)
>  create mode 100644 include/trace/events/cxl.h
> 
> diff --git a/include/trace/events/cxl.h b/include/trace/events/cxl.h
> new file mode 100644
> index 000000000000..17e707c3817e
> --- /dev/null
> +++ b/include/trace/events/cxl.h
> @@ -0,0 +1,60 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#undef TRACE_SYSTEM
> +#define TRACE_SYSTEM cxl
> +
> +#if !defined(_CXL_TRACE_H) ||  defined(TRACE_HEADER_MULTI_READ)
> +#define _CXL_TRACE_H
> +
> +#include <linux/tracepoint.h>
> +
> +TRACE_DEFINE_ENUM(CXL_POISON_SOURCE_UNKNOWN);
> +TRACE_DEFINE_ENUM(CXL_POISON_SOURCE_INTERNAL);
> +TRACE_DEFINE_ENUM(CXL_POISON_SOURCE_EXTERNAL);
> +TRACE_DEFINE_ENUM(CXL_POISON_SOURCE_INJECTED);
> +TRACE_DEFINE_ENUM(CXL_POISON_SOURCE_VENDOR);
> +TRACE_DEFINE_ENUM(CXL_POISON_SOURCE_INVALID);
> +
> +#define show_poison_source(source)					\
> +	__print_symbolic(source,					\
> +			{CXL_POISON_SOURCE_UNKNOWN,  "UNKNOWN"},	\
> +			{CXL_POISON_SOURCE_EXTERNAL, "EXTERNAL"},	\
> +			{CXL_POISON_SOURCE_INTERNAL, "INTERNAL"},	\
> +			{CXL_POISON_SOURCE_INJECTED, "INJECTED"},	\
> +			{CXL_POISON_SOURCE_VENDOR,   "VENDOR"},		\
> +			{CXL_POISON_SOURCE_INVALID,  "INVALID"})
> +
> +TRACE_EVENT(cxl_poison_list,
> +
> +	    TP_PROTO(struct device *dev,
> +		     int source,
> +		     unsigned long start,
> +		     unsigned int length),
> +
> +	    TP_ARGS(dev, source, start, length),
> +
> +	    TP_STRUCT__entry(
> +		__string(name, dev_name(dev))
> +		__field(int, source)
> +		__field(u64, start)
> +		__field(u32, length)
> +	    ),
> +
> +	    TP_fast_assign(
> +		__assign_str(name, dev_name(dev));
> +		__entry->source = source;
> +		__entry->start = start;
> +		__entry->length = length;
> +	    ),
> +
> +	    TP_printk("dev %s source %s start %llu length %u",
> +		__get_str(name),
> +		show_poison_source(__entry->source),
> +		__entry->start,
> +		__entry->length)
> +);
> +#endif /* _CXL_TRACE_H */
> +
> +/* This part must be outside protection */
> +#undef TRACE_INCLUDE_FILE
> +#define TRACE_INCLUDE_FILE cxl
> +#include <trace/define_trace.h>
Dan Williams June 17, 2022, 6:04 p.m. UTC | #4
alison.schofield@ wrote:
> From: Alison Schofield <alison.schofield@intel.com>
> 
> Add a trace event for CXL Poison List Media Error Records that
> includes the starting DPA of the poison, the length, and the
> the source of the poison.
> 
> This trace event will be used by the CXL_MEM driver to log the
> Media Errors returned by the GET_POISON_LIST Mailbox command.
> 
> Signed-off-by: Alison Schofield <alison.schofield@intel.com>
> ---
>  include/trace/events/cxl.h | 60 ++++++++++++++++++++++++++++++++++++++
>  1 file changed, 60 insertions(+)
>  create mode 100644 include/trace/events/cxl.h
> 
> diff --git a/include/trace/events/cxl.h b/include/trace/events/cxl.h
> new file mode 100644
> index 000000000000..17e707c3817e
> --- /dev/null
> +++ b/include/trace/events/cxl.h
> @@ -0,0 +1,60 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#undef TRACE_SYSTEM
> +#define TRACE_SYSTEM cxl
> +
> +#if !defined(_CXL_TRACE_H) ||  defined(TRACE_HEADER_MULTI_READ)
> +#define _CXL_TRACE_H
> +
> +#include <linux/tracepoint.h>
> +
> +TRACE_DEFINE_ENUM(CXL_POISON_SOURCE_UNKNOWN);
> +TRACE_DEFINE_ENUM(CXL_POISON_SOURCE_INTERNAL);
> +TRACE_DEFINE_ENUM(CXL_POISON_SOURCE_EXTERNAL);
> +TRACE_DEFINE_ENUM(CXL_POISON_SOURCE_INJECTED);
> +TRACE_DEFINE_ENUM(CXL_POISON_SOURCE_VENDOR);
> +TRACE_DEFINE_ENUM(CXL_POISON_SOURCE_INVALID);
> +
> +#define show_poison_source(source)					\
> +	__print_symbolic(source,					\
> +			{CXL_POISON_SOURCE_UNKNOWN,  "UNKNOWN"},	\
> +			{CXL_POISON_SOURCE_EXTERNAL, "EXTERNAL"},	\
> +			{CXL_POISON_SOURCE_INTERNAL, "INTERNAL"},	\
> +			{CXL_POISON_SOURCE_INJECTED, "INJECTED"},	\
> +			{CXL_POISON_SOURCE_VENDOR,   "VENDOR"},		\
> +			{CXL_POISON_SOURCE_INVALID,  "INVALID"})
> +
> +TRACE_EVENT(cxl_poison_list,
> +
> +	    TP_PROTO(struct device *dev,

The CXL memory device names need some decoding before other userspace
RAS tools might know what they are. So in addition to
dev_name(&cxlmd->dev) I think it would be useful to include
dev_name(cxlmd->dev.parent), i.e. the host PCI device name.

Other than that, looks good to me.

> +		     int source,
> +		     unsigned long start,
> +		     unsigned int length),
> +
> +	    TP_ARGS(dev, source, start, length),
> +
> +	    TP_STRUCT__entry(
> +		__string(name, dev_name(dev))
> +		__field(int, source)
> +		__field(u64, start)
> +		__field(u32, length)
> +	    ),
> +
> +	    TP_fast_assign(
> +		__assign_str(name, dev_name(dev));
> +		__entry->source = source;
> +		__entry->start = start;
> +		__entry->length = length;
> +	    ),
> +
> +	    TP_printk("dev %s source %s start %llu length %u",
> +		__get_str(name),
> +		show_poison_source(__entry->source),
> +		__entry->start,
> +		__entry->length)
> +);
> +#endif /* _CXL_TRACE_H */
> +
> +/* This part must be outside protection */
> +#undef TRACE_INCLUDE_FILE
> +#define TRACE_INCLUDE_FILE cxl
> +#include <trace/define_trace.h>
> -- 
> 2.31.1
>
diff mbox series

Patch

diff --git a/include/trace/events/cxl.h b/include/trace/events/cxl.h
new file mode 100644
index 000000000000..17e707c3817e
--- /dev/null
+++ b/include/trace/events/cxl.h
@@ -0,0 +1,60 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM cxl
+
+#if !defined(_CXL_TRACE_H) ||  defined(TRACE_HEADER_MULTI_READ)
+#define _CXL_TRACE_H
+
+#include <linux/tracepoint.h>
+
+TRACE_DEFINE_ENUM(CXL_POISON_SOURCE_UNKNOWN);
+TRACE_DEFINE_ENUM(CXL_POISON_SOURCE_INTERNAL);
+TRACE_DEFINE_ENUM(CXL_POISON_SOURCE_EXTERNAL);
+TRACE_DEFINE_ENUM(CXL_POISON_SOURCE_INJECTED);
+TRACE_DEFINE_ENUM(CXL_POISON_SOURCE_VENDOR);
+TRACE_DEFINE_ENUM(CXL_POISON_SOURCE_INVALID);
+
+#define show_poison_source(source)					\
+	__print_symbolic(source,					\
+			{CXL_POISON_SOURCE_UNKNOWN,  "UNKNOWN"},	\
+			{CXL_POISON_SOURCE_EXTERNAL, "EXTERNAL"},	\
+			{CXL_POISON_SOURCE_INTERNAL, "INTERNAL"},	\
+			{CXL_POISON_SOURCE_INJECTED, "INJECTED"},	\
+			{CXL_POISON_SOURCE_VENDOR,   "VENDOR"},		\
+			{CXL_POISON_SOURCE_INVALID,  "INVALID"})
+
+TRACE_EVENT(cxl_poison_list,
+
+	    TP_PROTO(struct device *dev,
+		     int source,
+		     unsigned long start,
+		     unsigned int length),
+
+	    TP_ARGS(dev, source, start, length),
+
+	    TP_STRUCT__entry(
+		__string(name, dev_name(dev))
+		__field(int, source)
+		__field(u64, start)
+		__field(u32, length)
+	    ),
+
+	    TP_fast_assign(
+		__assign_str(name, dev_name(dev));
+		__entry->source = source;
+		__entry->start = start;
+		__entry->length = length;
+	    ),
+
+	    TP_printk("dev %s source %s start %llu length %u",
+		__get_str(name),
+		show_poison_source(__entry->source),
+		__entry->start,
+		__entry->length)
+);
+#endif /* _CXL_TRACE_H */
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE cxl
+#include <trace/define_trace.h>