diff mbox series

[1/1] cxl/events: Fix Trace DRAM Event Record

Message ID 20241014143003.1170-1-shiju.jose@huawei.com
State Accepted
Commit 53ab8678e7180834be29cf56cd52825fc3427c02
Headers show
Series [1/1] cxl/events: Fix Trace DRAM Event Record | expand

Commit Message

Shiju Jose Oct. 14, 2024, 2:30 p.m. UTC
From: Shiju Jose <shiju.jose@huawei.com>

CXL spec rev 3.0 section 8.2.9.2.1.2 defines the DRAM Event Record.

Fix decode memory event type field of DRAM Event Record.
For e.g. if value is 0x1 it will be reported as an Invalid Address
(General Media Event Record - Memory Event Type) instead of Scrub Media
ECC Error (DRAM Event Record - Memory Event Type) and so on.

Fixes: 2d6c1e6d60ba ("cxl/mem: Trace DRAM Event Record")
Signed-off-by: Shiju Jose <shiju.jose@huawei.com>
---
Updates for event records in CXL spec r3.1 will follow shortly.

 drivers/cxl/core/trace.h | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

Comments

Ira Weiny Oct. 14, 2024, 2:50 p.m. UTC | #1
shiju.jose@ wrote:
> From: Shiju Jose <shiju.jose@huawei.com>
> 
> CXL spec rev 3.0 section 8.2.9.2.1.2 defines the DRAM Event Record.
> 
> Fix decode memory event type field of DRAM Event Record.
> For e.g. if value is 0x1 it will be reported as an Invalid Address
> (General Media Event Record - Memory Event Type) instead of Scrub Media
> ECC Error (DRAM Event Record - Memory Event Type) and so on.
> 
> Fixes: 2d6c1e6d60ba ("cxl/mem: Trace DRAM Event Record")

I assume this is causing issues with user space?

If so I will add...

Cc: <stable@vger.kernel.org> # 6.3.x

... and queue this up in cxl-fixes.

Reviewed-by: Ira Weiny <ira.weiny@intel.com>

> Signed-off-by: Shiju Jose <shiju.jose@huawei.com>
> ---
> Updates for event records in CXL spec r3.1 will follow shortly.
> 
>  drivers/cxl/core/trace.h | 17 ++++++++++++++---
>  1 file changed, 14 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/cxl/core/trace.h b/drivers/cxl/core/trace.h
> index 9167cfba7f59..cdffebcf20a4 100644
> --- a/drivers/cxl/core/trace.h
> +++ b/drivers/cxl/core/trace.h
> @@ -279,7 +279,7 @@ TRACE_EVENT(cxl_generic_event,
>  #define CXL_GMER_MEM_EVT_TYPE_ECC_ERROR			0x00
>  #define CXL_GMER_MEM_EVT_TYPE_INV_ADDR			0x01
>  #define CXL_GMER_MEM_EVT_TYPE_DATA_PATH_ERROR		0x02
> -#define show_mem_event_type(type)	__print_symbolic(type,			\
> +#define show_gmer_mem_event_type(type)	__print_symbolic(type,			\
>  	{ CXL_GMER_MEM_EVT_TYPE_ECC_ERROR,		"ECC Error" },		\
>  	{ CXL_GMER_MEM_EVT_TYPE_INV_ADDR,		"Invalid Address" },	\
>  	{ CXL_GMER_MEM_EVT_TYPE_DATA_PATH_ERROR,	"Data Path Error" }	\
> @@ -373,7 +373,7 @@ TRACE_EVENT(cxl_general_media,
>  		"hpa=%llx region=%s region_uuid=%pUb",
>  		__entry->dpa, show_dpa_flags(__entry->dpa_flags),
>  		show_event_desc_flags(__entry->descriptor),
> -		show_mem_event_type(__entry->type),
> +		show_gmer_mem_event_type(__entry->type),
>  		show_trans_type(__entry->transaction_type),
>  		__entry->channel, __entry->rank, __entry->device,
>  		__print_hex(__entry->comp_id, CXL_EVENT_GEN_MED_COMP_ID_SIZE),
> @@ -391,6 +391,17 @@ TRACE_EVENT(cxl_general_media,
>   * DRAM Event Record defines many fields the same as the General Media Event
>   * Record.  Reuse those definitions as appropriate.
>   */
> +#define CXL_DER_MEM_EVT_TYPE_ECC_ERROR			0x00
> +#define CXL_DER_MEM_EVT_TYPE_SCRUB_MEDIA_ECC_ERROR	0x01
> +#define CXL_DER_MEM_EVT_TYPE_INV_ADDR			0x02
> +#define CXL_DER_MEM_EVT_TYPE_DATA_PATH_ERROR		0x03
> +#define show_dram_mem_event_type(type)  __print_symbolic(type,				\
> +	{ CXL_DER_MEM_EVT_TYPE_ECC_ERROR,		"ECC Error" },			\
> +	{ CXL_DER_MEM_EVT_TYPE_SCRUB_MEDIA_ECC_ERROR,	"Scrub Media ECC Error" },	\
> +	{ CXL_DER_MEM_EVT_TYPE_INV_ADDR,		"Invalid Address" },		\
> +	{ CXL_DER_MEM_EVT_TYPE_DATA_PATH_ERROR,		"Data Path Error" }		\
> +)
> +
>  #define CXL_DER_VALID_CHANNEL				BIT(0)
>  #define CXL_DER_VALID_RANK				BIT(1)
>  #define CXL_DER_VALID_NIBBLE				BIT(2)
> @@ -477,7 +488,7 @@ TRACE_EVENT(cxl_dram,
>  		"hpa=%llx region=%s region_uuid=%pUb",
>  		__entry->dpa, show_dpa_flags(__entry->dpa_flags),
>  		show_event_desc_flags(__entry->descriptor),
> -		show_mem_event_type(__entry->type),
> +		show_dram_mem_event_type(__entry->type),
>  		show_trans_type(__entry->transaction_type),
>  		__entry->channel, __entry->rank, __entry->nibble_mask,
>  		__entry->bank_group, __entry->bank,
> -- 
> 2.34.1
>
Shiju Jose Oct. 14, 2024, 3:16 p.m. UTC | #2
>-----Original Message-----
>From: Ira Weiny <ira.weiny@intel.com>
>Sent: 14 October 2024 15:51
>To: Shiju Jose <shiju.jose@huawei.com>; dave.jiang@intel.com;
>dan.j.williams@intel.com; Jonathan Cameron
><jonathan.cameron@huawei.com>; alison.schofield@intel.com;
>vishal.l.verma@intel.com; ira.weiny@intel.com; dave@stgolabs.net; linux-
>cxl@vger.kernel.org
>Cc: Linuxarm <linuxarm@huawei.com>; tanxiaofei <tanxiaofei@huawei.com>;
>Zengtao (B) <prime.zeng@hisilicon.com>; Shiju Jose <shiju.jose@huawei.com>
>Subject: Re: [PATCH 1/1] cxl/events: Fix Trace DRAM Event Record
>
>shiju.jose@ wrote:
>> From: Shiju Jose <shiju.jose@huawei.com>
>>
>> CXL spec rev 3.0 section 8.2.9.2.1.2 defines the DRAM Event Record.
>>
>> Fix decode memory event type field of DRAM Event Record.
>> For e.g. if value is 0x1 it will be reported as an Invalid Address
>> (General Media Event Record - Memory Event Type) instead of Scrub
>> Media ECC Error (DRAM Event Record - Memory Event Type) and so on.
>>
>> Fixes: 2d6c1e6d60ba ("cxl/mem: Trace DRAM Event Record")
>
>I assume this is causing issues with user space?
This change directly affects kernel logging for memory event type in DRAM  event record only.
However similar fix is needed in rasdaemon too because I referred the kernel code for the
CXL event logging part in the user space.
>
>If so I will add...
>
>Cc: <stable@vger.kernel.org> # 6.3.x
>
>... and queue this up in cxl-fixes.
>
>Reviewed-by: Ira Weiny <ira.weiny@intel.com>
>
>> Signed-off-by: Shiju Jose <shiju.jose@huawei.com>
>> ---
>> Updates for event records in CXL spec r3.1 will follow shortly.
>>
>>  drivers/cxl/core/trace.h | 17 ++++++++++++++---
>>  1 file changed, 14 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/cxl/core/trace.h b/drivers/cxl/core/trace.h index
>> 9167cfba7f59..cdffebcf20a4 100644
>> --- a/drivers/cxl/core/trace.h
>> +++ b/drivers/cxl/core/trace.h
>> @@ -279,7 +279,7 @@ TRACE_EVENT(cxl_generic_event,
>>  #define CXL_GMER_MEM_EVT_TYPE_ECC_ERROR			0x00
>>  #define CXL_GMER_MEM_EVT_TYPE_INV_ADDR			0x01
>>  #define CXL_GMER_MEM_EVT_TYPE_DATA_PATH_ERROR		0x02
>> -#define show_mem_event_type(type)	__print_symbolic(type,
>	\
>> +#define show_gmer_mem_event_type(type)	__print_symbolic(type,
>		\
>>  	{ CXL_GMER_MEM_EVT_TYPE_ECC_ERROR,		"ECC Error" },
>		\
>>  	{ CXL_GMER_MEM_EVT_TYPE_INV_ADDR,		"Invalid
>Address" },	\
>>  	{ CXL_GMER_MEM_EVT_TYPE_DATA_PATH_ERROR,	"Data Path
>Error" }	\
>> @@ -373,7 +373,7 @@ TRACE_EVENT(cxl_general_media,
>>  		"hpa=%llx region=%s region_uuid=%pUb",
>>  		__entry->dpa, show_dpa_flags(__entry->dpa_flags),
>>  		show_event_desc_flags(__entry->descriptor),
>> -		show_mem_event_type(__entry->type),
>> +		show_gmer_mem_event_type(__entry->type),
>>  		show_trans_type(__entry->transaction_type),
>>  		__entry->channel, __entry->rank, __entry->device,
>>  		__print_hex(__entry->comp_id,
>CXL_EVENT_GEN_MED_COMP_ID_SIZE), @@
>> -391,6 +391,17 @@ TRACE_EVENT(cxl_general_media,
>>   * DRAM Event Record defines many fields the same as the General Media
>Event
>>   * Record.  Reuse those definitions as appropriate.
>>   */
>> +#define CXL_DER_MEM_EVT_TYPE_ECC_ERROR			0x00
>> +#define CXL_DER_MEM_EVT_TYPE_SCRUB_MEDIA_ECC_ERROR	0x01
>> +#define CXL_DER_MEM_EVT_TYPE_INV_ADDR			0x02
>> +#define CXL_DER_MEM_EVT_TYPE_DATA_PATH_ERROR		0x03
>> +#define show_dram_mem_event_type(type)  __print_symbolic(type,
>			\
>> +	{ CXL_DER_MEM_EVT_TYPE_ECC_ERROR,		"ECC Error" },
>			\
>> +	{ CXL_DER_MEM_EVT_TYPE_SCRUB_MEDIA_ECC_ERROR,	"Scrub
>Media ECC Error" },	\
>> +	{ CXL_DER_MEM_EVT_TYPE_INV_ADDR,		"Invalid
>Address" },		\
>> +	{ CXL_DER_MEM_EVT_TYPE_DATA_PATH_ERROR,		"Data
>Path Error" }		\
>> +)
>> +
>>  #define CXL_DER_VALID_CHANNEL				BIT(0)
>>  #define CXL_DER_VALID_RANK				BIT(1)
>>  #define CXL_DER_VALID_NIBBLE				BIT(2)
>> @@ -477,7 +488,7 @@ TRACE_EVENT(cxl_dram,
>>  		"hpa=%llx region=%s region_uuid=%pUb",
>>  		__entry->dpa, show_dpa_flags(__entry->dpa_flags),
>>  		show_event_desc_flags(__entry->descriptor),
>> -		show_mem_event_type(__entry->type),
>> +		show_dram_mem_event_type(__entry->type),
>>  		show_trans_type(__entry->transaction_type),
>>  		__entry->channel, __entry->rank, __entry->nibble_mask,
>>  		__entry->bank_group, __entry->bank,
>> --
>> 2.34.1
>>
>
>
Thanks,
Shiju
diff mbox series

Patch

diff --git a/drivers/cxl/core/trace.h b/drivers/cxl/core/trace.h
index 9167cfba7f59..cdffebcf20a4 100644
--- a/drivers/cxl/core/trace.h
+++ b/drivers/cxl/core/trace.h
@@ -279,7 +279,7 @@  TRACE_EVENT(cxl_generic_event,
 #define CXL_GMER_MEM_EVT_TYPE_ECC_ERROR			0x00
 #define CXL_GMER_MEM_EVT_TYPE_INV_ADDR			0x01
 #define CXL_GMER_MEM_EVT_TYPE_DATA_PATH_ERROR		0x02
-#define show_mem_event_type(type)	__print_symbolic(type,			\
+#define show_gmer_mem_event_type(type)	__print_symbolic(type,			\
 	{ CXL_GMER_MEM_EVT_TYPE_ECC_ERROR,		"ECC Error" },		\
 	{ CXL_GMER_MEM_EVT_TYPE_INV_ADDR,		"Invalid Address" },	\
 	{ CXL_GMER_MEM_EVT_TYPE_DATA_PATH_ERROR,	"Data Path Error" }	\
@@ -373,7 +373,7 @@  TRACE_EVENT(cxl_general_media,
 		"hpa=%llx region=%s region_uuid=%pUb",
 		__entry->dpa, show_dpa_flags(__entry->dpa_flags),
 		show_event_desc_flags(__entry->descriptor),
-		show_mem_event_type(__entry->type),
+		show_gmer_mem_event_type(__entry->type),
 		show_trans_type(__entry->transaction_type),
 		__entry->channel, __entry->rank, __entry->device,
 		__print_hex(__entry->comp_id, CXL_EVENT_GEN_MED_COMP_ID_SIZE),
@@ -391,6 +391,17 @@  TRACE_EVENT(cxl_general_media,
  * DRAM Event Record defines many fields the same as the General Media Event
  * Record.  Reuse those definitions as appropriate.
  */
+#define CXL_DER_MEM_EVT_TYPE_ECC_ERROR			0x00
+#define CXL_DER_MEM_EVT_TYPE_SCRUB_MEDIA_ECC_ERROR	0x01
+#define CXL_DER_MEM_EVT_TYPE_INV_ADDR			0x02
+#define CXL_DER_MEM_EVT_TYPE_DATA_PATH_ERROR		0x03
+#define show_dram_mem_event_type(type)  __print_symbolic(type,				\
+	{ CXL_DER_MEM_EVT_TYPE_ECC_ERROR,		"ECC Error" },			\
+	{ CXL_DER_MEM_EVT_TYPE_SCRUB_MEDIA_ECC_ERROR,	"Scrub Media ECC Error" },	\
+	{ CXL_DER_MEM_EVT_TYPE_INV_ADDR,		"Invalid Address" },		\
+	{ CXL_DER_MEM_EVT_TYPE_DATA_PATH_ERROR,		"Data Path Error" }		\
+)
+
 #define CXL_DER_VALID_CHANNEL				BIT(0)
 #define CXL_DER_VALID_RANK				BIT(1)
 #define CXL_DER_VALID_NIBBLE				BIT(2)
@@ -477,7 +488,7 @@  TRACE_EVENT(cxl_dram,
 		"hpa=%llx region=%s region_uuid=%pUb",
 		__entry->dpa, show_dpa_flags(__entry->dpa_flags),
 		show_event_desc_flags(__entry->descriptor),
-		show_mem_event_type(__entry->type),
+		show_dram_mem_event_type(__entry->type),
 		show_trans_type(__entry->transaction_type),
 		__entry->channel, __entry->rank, __entry->nibble_mask,
 		__entry->bank_group, __entry->bank,