@@ -673,10 +673,13 @@ static bool cxl_report_and_clear(struct cxl_dev_state *cxlds)
void __iomem *addr;
u32 status;
u32 fe;
+ bool mh;
if (!cxlds->regs.ras)
return false;
+next_record:
+ mh = false;
addr = cxlds->regs.ras + CXL_RAS_UNCORRECTABLE_STATUS_OFFSET;
status = readl(addr);
if (!(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK))
@@ -684,11 +687,13 @@ static bool cxl_report_and_clear(struct cxl_dev_state *cxlds)
/* If multiple errors, log header points to first error from ctrl reg */
if (hweight32(status) > 1) {
- void __iomem *rcc_addr =
- cxlds->regs.ras + CXL_RAS_CAP_CONTROL_OFFSET;
-
- fe = BIT(FIELD_GET(CXL_RAS_CAP_CONTROL_FE_MASK,
- readl(rcc_addr)));
+ u32 capctrl = readl(cxlds->regs.ras + CXL_RAS_CAP_CONTROL_OFFSET);
+ fe = BIT(FIELD_GET(CXL_RAS_CAP_CONTROL_FE_MASK, capctrl));
+ if (FIELD_GET(CXL_RAS_CAP_CONTROL_MH_REC_CAP, capctrl)) {
+ mh = true;
+ /* Report and clear only first error */
+ status = fe;
+ }
} else {
fe = status;
}
@@ -696,6 +701,8 @@ static bool cxl_report_and_clear(struct cxl_dev_state *cxlds)
header_log_copy(cxlds, hl);
trace_cxl_aer_uncorrectable_error(dev, status, fe, hl);
writel(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK, addr);
+ if (mh)
+ goto next_record;
return true;
}
@@ -138,6 +138,7 @@ static inline int ways_to_eiw(unsigned int ways, u8 *eiw)
#define CXL_RAS_CORRECTABLE_MASK_MASK GENMASK(6, 0)
#define CXL_RAS_CAP_CONTROL_OFFSET 0x14
#define CXL_RAS_CAP_CONTROL_FE_MASK GENMASK(5, 0)
+#define CXL_RAS_CAP_CONTROL_MH_REC_CAP BIT(9)
#define CXL_RAS_HEADER_LOG_OFFSET 0x18
#define CXL_RAS_CAPABILITY_LENGTH 0x58
#define CXL_HEADERLOG_SIZE SZ_512
Similar to PCIe, CXL devices may support logging multiple headers corresponding to multiple errors as reported via the CXL RAS capability. Unlike PCIe, in CXL there is no Multiple Header Recording Enable bit and the CXL r3.0 specification is sparse on details. As such, the kernel should allow for any reasonable interpretation including endpoints for which the capability bit is set that behave as per the PCIe equivalent definitions (with assumption that the missing 'enable bit' is set). Note that behaving as if Multiple Headers are being logged is also valid behavior when they are not so this approach should be safe with all sensible specification interpretations. By repeatedly attempting to clear a single bit corresponding to the reported First Error (may need multiple goes if multiple records of same type are tracked by the hardware) the additional header logs may be obtained. Note that each trace record only records the FE in the status. We could record them all as done without Multi header recording capability but that seemed less intuitive to me. Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> --- drivers/cxl/core/pci.c | 17 ++++++++++++----- drivers/cxl/cxl.h | 1 + 2 files changed, 13 insertions(+), 5 deletions(-)