diff mbox series

[v8,15/16] CXL/PCI: Enable CXL protocol errors during CXL Port probe

Message ID 20250327014717.2988633-16-terry.bowman@amd.com
State New
Headers show
Series Enable CXL PCIe port protocol error handling and logging | expand

Commit Message

Bowman, Terry March 27, 2025, 1:47 a.m. UTC
CXL protocol errors are not enabled for all CXL devices at boot. These
must be enabled inorder to process CXL protocol errors.

Export the AER service driver's pci_aer_unmask_internal_errors().

Introduce cxl_enable_port_errors() to call pci_aer_unmask_internal_errors().
pci_aer_unmask_internal_errors() expects the pdev->aer_cap is initialized.
But, dev->aer_cap is not initialized for CXL Upstream Switch Ports and CXL
Downstream Switch Ports. Initialize the dev->aer_cap if necessary. Enable AER
correctable internal errors and uncorrectable internal errors for all CXL
devices.

Signed-off-by: Terry Bowman <terry.bowman@amd.com>
---
 drivers/cxl/cxl.h      |  2 ++
 drivers/cxl/port.c     | 22 ++++++++++++++++++++++
 drivers/pci/pcie/aer.c |  3 ++-
 include/linux/aer.h    |  1 +
 4 files changed, 27 insertions(+), 1 deletion(-)
diff mbox series

Patch

diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
index c1adf8a3cb9e..473267c19cd0 100644
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -769,9 +769,11 @@  struct cxl_dport *devm_cxl_add_rch_dport(struct cxl_port *port,
 #ifdef CONFIG_PCIEAER_CXL
 void cxl_setup_parent_dport(struct device *host, struct cxl_dport *dport);
 void cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device *host);
+void cxl_enable_prot_errors(struct device *dev);
 #else
 static inline void cxl_dport_init_ras_reporting(struct cxl_dport *dport,
 						struct device *host) { }
+static inline void cxl_enable_prot_errors(struct device *dev) { }
 #endif
 
 struct cxl_decoder *to_cxl_decoder(struct device *dev);
diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c
index 8e2b70e73582..bb7a0526e609 100644
--- a/drivers/cxl/port.c
+++ b/drivers/cxl/port.c
@@ -83,6 +83,24 @@  static void cxl_assign_error_handlers(struct device *_dev,
 	pdrv->err_handler = handlers;
 }
 
+void cxl_enable_prot_errors(struct device *dev)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct device *pci_dev __free(put_device) = get_device(&pdev->dev);
+
+	if (!pci_dev)
+		return;
+
+	if (!pdev->aer_cap) {
+		pdev->aer_cap = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ERR);
+		if (!pdev->aer_cap)
+			return;
+	}
+
+	pci_aer_unmask_internal_errors(pdev);
+}
+EXPORT_SYMBOL_NS_GPL(cxl_enable_prot_errors, "CXL");
+
 static void cxl_dport_map_rch_aer(struct cxl_dport *dport)
 {
 	resource_size_t aer_phys;
@@ -147,6 +165,7 @@  static void cxl_uport_init_ras_reporting(struct cxl_port *port,
 	}
 
 	cxl_assign_error_handlers(&port->dev, &cxl_port_error_handlers);
+	cxl_enable_prot_errors(port->uport_dev);
 }
 
 /**
@@ -177,6 +196,7 @@  void cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device *host)
 	}
 
 	cxl_assign_error_handlers(dport->dport_dev, &cxl_port_error_handlers);
+	cxl_enable_prot_errors(dport->dport_dev);
 }
 EXPORT_SYMBOL_NS_GPL(cxl_dport_init_ras_reporting, "CXL");
 
@@ -201,6 +221,7 @@  static void cxl_endpoint_port_init_ras(struct cxl_port *port)
 	struct cxl_port *parent_port __free(put_cxl_port) =
 		cxl_mem_find_port(cxlmd, &dport);
 	struct device *cxlmd_dev __free(put_device) = &cxlmd->dev;
+	struct cxl_dev_state *cxlds = cxlmd->cxlds;
 
 	if (!dport || !dev_is_pci(dport->dport_dev)) {
 		dev_err(&port->dev, "CXL port topology not found\n");
@@ -210,6 +231,7 @@  static void cxl_endpoint_port_init_ras(struct cxl_port *port)
 	cxl_dport_init_ras_reporting(dport, cxlmd_dev);
 
 	cxl_assign_error_handlers(cxlmd_dev, &cxl_ep_error_handlers);
+	cxl_enable_prot_errors(cxlds->dev);
 }
 
 #else
diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
index 763ec6aa1a9a..d3068f5cc767 100644
--- a/drivers/pci/pcie/aer.c
+++ b/drivers/pci/pcie/aer.c
@@ -962,7 +962,7 @@  static bool find_source_device(struct pci_dev *parent,
  * Note: AER must be enabled and supported by the device which must be
  * checked in advance, e.g. with pcie_aer_is_native().
  */
-static void pci_aer_unmask_internal_errors(struct pci_dev *dev)
+void pci_aer_unmask_internal_errors(struct pci_dev *dev)
 {
 	int aer = dev->aer_cap;
 	u32 mask;
@@ -975,6 +975,7 @@  static void pci_aer_unmask_internal_errors(struct pci_dev *dev)
 	mask &= ~PCI_ERR_COR_INTERNAL;
 	pci_write_config_dword(dev, aer + PCI_ERR_COR_MASK, mask);
 }
+EXPORT_SYMBOL_NS_GPL(pci_aer_unmask_internal_errors, "CXL");
 
 static bool is_cxl_mem_dev(struct pci_dev *dev)
 {
diff --git a/include/linux/aer.h b/include/linux/aer.h
index 8f815f34d447..a65fe324fad2 100644
--- a/include/linux/aer.h
+++ b/include/linux/aer.h
@@ -100,5 +100,6 @@  void pci_print_aer(struct pci_dev *dev, int aer_severity,
 int cper_severity_to_aer(int cper_severity);
 void aer_recover_queue(int domain, unsigned int bus, unsigned int devfn,
 		       int severity, struct aer_capability_regs *aer_regs);
+void pci_aer_unmask_internal_errors(struct pci_dev *dev);
 #endif //_AER_H_