diff mbox series

PCI/AER: Consolidate CXL and native AER reporting paths

Message ID 8bcb8c9a7b38ce3bdaca5a64fe76f08b0b337511.1742202797.git.karolina.stolarek@oracle.com (mailing list archive)
State New
Headers show
Series PCI/AER: Consolidate CXL and native AER reporting paths | expand

Commit Message

Karolina Stolarek March 17, 2025, 10:14 a.m. UTC
Make CXL devices use aer_print_error() when reporting AER errors.
Add a helper function to populate aer_err_info struct before logging
an error. Move struct aer_err_info definition to the aer.h header
to make it visible to CXL.

Signed-off-by: Karolina Stolarek <karolina.stolarek@oracle.com>
---
The patch was tested on the top of Terry Bowman's series[1], with
a setup as outlined in the cover letter, and rebased on the top
of pci-next, with no functional changes.

[1] -
https://lore.kernel.org/linux-pci/20250211192444.2292833-1-terry.bowman@amd.com

 drivers/cxl/core/pci.c |  5 +++-
 drivers/pci/pci.h      | 23 ----------------
 drivers/pci/pcie/aer.c | 60 ++++++++++++++++++------------------------
 include/linux/aer.h    | 25 ++++++++++++++++--
 4 files changed, 52 insertions(+), 61 deletions(-)
diff mbox series

Patch

diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
index 013b869b66cb..217f13c30bde 100644
--- a/drivers/cxl/core/pci.c
+++ b/drivers/cxl/core/pci.c
@@ -871,6 +871,7 @@  static void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds)
 {
 	struct pci_dev *pdev = to_pci_dev(cxlds->dev);
 	struct aer_capability_regs aer_regs;
+	struct aer_err_info info;
 	struct cxl_dport *dport;
 	int severity;
 
@@ -885,7 +886,9 @@  static void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds)
 	if (!cxl_rch_get_aer_severity(&aer_regs, &severity))
 		return;
 
-	pci_print_aer(pdev, severity, &aer_regs);
+	memset(&info, 0, sizeof(info));
+	populate_aer_err_info(&info, severity, &aer_regs);
+	aer_print_error(pdev, &info);
 
 	if (severity == AER_CORRECTABLE)
 		cxl_handle_rdport_cor_ras(cxlds, dport);
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 9e0378fa30ac..b799c2ff7b85 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -561,30 +561,7 @@  static inline bool pci_dev_test_and_set_removed(struct pci_dev *dev)
 #ifdef CONFIG_PCIEAER
 #include <linux/aer.h>
 
-#define AER_MAX_MULTI_ERR_DEVICES	5	/* Not likely to have more */
-
-struct aer_err_info {
-	struct pci_dev *dev[AER_MAX_MULTI_ERR_DEVICES];
-	int error_dev_num;
-
-	unsigned int id:16;
-
-	unsigned int severity:2;	/* 0:NONFATAL | 1:FATAL | 2:COR */
-	unsigned int __pad1:5;
-	unsigned int multi_error_valid:1;
-
-	unsigned int first_error:5;
-	unsigned int __pad2:2;
-	unsigned int tlp_header_valid:1;
-
-	unsigned int status;		/* COR/UNCOR Error Status */
-	unsigned int mask;		/* COR/UNCOR Error Mask */
-	struct pcie_tlp_log tlp;	/* TLP Header */
-};
-
 int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info);
-void aer_print_error(struct pci_dev *dev, struct aer_err_info *info);
-
 int pcie_read_tlp_log(struct pci_dev *dev, int where, int where2,
 		      unsigned int tlp_len, bool flit,
 		      struct pcie_tlp_log *log);
diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
index a1cf8c7ef628..411450ff981e 100644
--- a/drivers/pci/pcie/aer.c
+++ b/drivers/pci/pcie/aer.c
@@ -760,47 +760,33 @@  int cper_severity_to_aer(int cper_severity)
 EXPORT_SYMBOL_GPL(cper_severity_to_aer);
 #endif
 
-void pci_print_aer(struct pci_dev *dev, int aer_severity,
-		   struct aer_capability_regs *aer)
+void populate_aer_err_info(struct aer_err_info *info, int aer_severity,
+			   struct aer_capability_regs *regs)
 {
-	int layer, agent, tlp_header_valid = 0;
-	u32 status, mask;
-	struct aer_err_info info;
+	int tlp_header_valid;
+
+	info->severity = aer_severity;
+	info->first_error = PCI_ERR_CAP_FEP(regs->cap_control);
 
 	if (aer_severity == AER_CORRECTABLE) {
-		status = aer->cor_status;
-		mask = aer->cor_mask;
+		info->id = regs->cor_err_source;
+		info->status = regs->cor_status;
+		info->mask = regs->cor_mask;
 	} else {
-		status = aer->uncor_status;
-		mask = aer->uncor_mask;
-		tlp_header_valid = status & AER_LOG_TLP_MASKS;
+		info->id = regs->uncor_err_source;
+		info->status = regs->uncor_status;
+		info->mask = regs->uncor_mask;
+		tlp_header_valid = info->status & AER_LOG_TLP_MASKS;
+
+		if (tlp_header_valid) {
+			info->tlp_header_valid = tlp_header_valid;
+			info->tlp = regs->header_log;
+		}
 	}
+}
+EXPORT_SYMBOL_NS_GPL(populate_aer_err_info, "CXL");
 
-	layer = AER_GET_LAYER_ERROR(aer_severity, status);
-	agent = AER_GET_AGENT(aer_severity, status);
-
-	memset(&info, 0, sizeof(info));
-	info.severity = aer_severity;
-	info.status = status;
-	info.mask = mask;
-	info.first_error = PCI_ERR_CAP_FEP(aer->cap_control);
-
-	pci_err(dev, "aer_status: 0x%08x, aer_mask: 0x%08x\n", status, mask);
-	__aer_print_error(dev, &info);
-	pci_err(dev, "aer_layer=%s, aer_agent=%s\n",
-		aer_error_layer[layer], aer_agent_string[agent]);
-
-	if (aer_severity != AER_CORRECTABLE)
-		pci_err(dev, "aer_uncor_severity: 0x%08x\n",
-			aer->uncor_severity);
-
-	if (tlp_header_valid)
-		pcie_print_tlp_log(dev, &aer->header_log, dev_fmt("  "));
 
-	trace_aer_event(dev_name(&dev->dev), (status & ~mask),
-			aer_severity, tlp_header_valid, &aer->header_log);
-}
-EXPORT_SYMBOL_NS_GPL(pci_print_aer, "CXL");
 
 /**
  * add_error_device - list device to be handled
@@ -1136,6 +1122,7 @@  static void aer_recover_work_func(struct work_struct *work)
 {
 	struct aer_recover_entry entry;
 	struct pci_dev *pdev;
+	struct aer_err_info info;
 
 	while (kfifo_get(&aer_recover_ring, &entry)) {
 		pdev = pci_get_domain_bus_and_slot(entry.domain, entry.bus,
@@ -1146,7 +1133,10 @@  static void aer_recover_work_func(struct work_struct *work)
 			       PCI_SLOT(entry.devfn), PCI_FUNC(entry.devfn));
 			continue;
 		}
-		pci_print_aer(pdev, entry.severity, entry.regs);
+
+		memset(&info, 0, sizeof(info));
+		populate_aer_err_info(&info, entry.severity, entry.regs);
+		aer_print_error(pdev, &info);
 
 		/*
 		 * Memory for aer_capability_regs(entry.regs) is being
diff --git a/include/linux/aer.h b/include/linux/aer.h
index 02940be66324..ab408ec18e85 100644
--- a/include/linux/aer.h
+++ b/include/linux/aer.h
@@ -53,6 +53,26 @@  struct aer_capability_regs {
 	u16 uncor_err_source;
 };
 
+#define AER_MAX_MULTI_ERR_DEVICES	5	/* Not likely to have more */
+struct aer_err_info {
+	struct pci_dev *dev[AER_MAX_MULTI_ERR_DEVICES];
+	int error_dev_num;
+
+	unsigned int id:16;
+
+	unsigned int severity:2;	/* 0:NONFATAL | 1:FATAL | 2:COR */
+	unsigned int __pad1:5;
+	unsigned int multi_error_valid:1;
+
+	unsigned int first_error:5;
+	unsigned int __pad2:2;
+	unsigned int tlp_header_valid:1;
+
+	unsigned int status;		/* COR/UNCOR Error Status */
+	unsigned int mask;		/* COR/UNCOR Error Mask */
+	struct pcie_tlp_log tlp;	/* TLP Header */
+};
+
 #if defined(CONFIG_PCIEAER)
 int pci_aer_clear_nonfatal_status(struct pci_dev *dev);
 int pcie_aer_is_native(struct pci_dev *dev);
@@ -64,8 +84,9 @@  static inline int pci_aer_clear_nonfatal_status(struct pci_dev *dev)
 static inline int pcie_aer_is_native(struct pci_dev *dev) { return 0; }
 #endif
 
-void pci_print_aer(struct pci_dev *dev, int aer_severity,
-		    struct aer_capability_regs *aer);
+void aer_print_error(struct pci_dev *dev, struct aer_err_info *info);
+void populate_aer_err_info(struct aer_err_info *info, int aer_severity,
+			   struct aer_capability_regs *regs);
 int cper_severity_to_aer(int cper_severity);
 void aer_recover_queue(int domain, unsigned int bus, unsigned int devfn,
 		       int severity, struct aer_capability_regs *aer_regs);