diff mbox series

[v8,12/16] cxl/pci: Assign CXL Port protocol error handlers

Message ID 20250327014717.2988633-13-terry.bowman@amd.com
State New
Headers show
Series Enable CXL PCIe port protocol error handling and logging | expand

Commit Message

Bowman, Terry March 27, 2025, 1:47 a.m. UTC
Introduce CXL error handlers for CXL Port devices. These are needed
to handle and log CXL protocol errors.

Update cxl_create_prot_err_info() with support for CXL Root Ports (RP), CXL
Upstream Switch Ports (USP) and CXL Downstreasm Switch ports (DSP).

Add functions cxl_port_error_detected() and cxl_port_cor_error_detected().

Add cxl_assign_error_handlers() and use to assign the CXL Port error
handlers for CXL RP, CXL USP, and CXL DSP. Make the assignments in
cxl_uport_init_ras() and cxl_dport_init_ras() after mapping RAS registers.

Signed-off-by: Terry Bowman <terry.bowman@amd.com>
---
 drivers/cxl/core/core.h |  2 ++
 drivers/cxl/core/pci.c  | 23 +++++++++++++
 drivers/cxl/core/port.c |  4 +--
 drivers/cxl/core/ras.c  | 76 +++++++++++++++++++++++++++++++++--------
 drivers/cxl/cxl.h       |  5 +++
 drivers/cxl/port.c      | 29 ++++++++++++++--
 6 files changed, 120 insertions(+), 19 deletions(-)
diff mbox series

Patch

diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h
index 15699299dc11..5ce7269e5f13 100644
--- a/drivers/cxl/core/core.h
+++ b/drivers/cxl/core/core.h
@@ -122,6 +122,8 @@  void cxl_ras_exit(void);
 int cxl_gpf_port_setup(struct device *dport_dev, struct cxl_port *port);
 int cxl_acpi_get_extended_linear_cache_size(struct resource *backing_res,
 					    int nid, resource_size_t *size);
+struct cxl_port *find_cxl_port(struct device *dport_dev,
+			       struct cxl_dport **dport);
 
 #ifdef CONFIG_CXL_FEATURES
 size_t cxl_get_feature(struct cxl_mailbox *cxl_mbox, const uuid_t *feat_uuid,
diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
index 10b2abfb0e64..9ed6f700e132 100644
--- a/drivers/cxl/core/pci.c
+++ b/drivers/cxl/core/pci.c
@@ -739,6 +739,29 @@  static bool cxl_handle_endpoint_ras(struct cxl_dev_state *cxlds)
 
 #ifdef CONFIG_PCIEAER_CXL
 
+
+void cxl_port_cor_error_detected(struct device *cxl_dev,
+				 struct cxl_prot_error_info *err_info)
+{
+	void __iomem *ras_base = err_info->ras_base;
+	struct device *pci_dev = &err_info->pdev->dev;
+	u64 serial = 0;
+
+	__cxl_handle_cor_ras(cxl_dev, pci_dev, serial, ras_base);
+}
+EXPORT_SYMBOL_NS_GPL(cxl_port_cor_error_detected, "CXL");
+
+pci_ers_result_t cxl_port_error_detected(struct device *cxl_dev,
+					 struct cxl_prot_error_info *err_info)
+{
+	void __iomem *ras_base = err_info->ras_base;
+	struct device *pci_dev = &err_info->pdev->dev;
+	u64 serial = 0;
+
+	return  __cxl_handle_ras(cxl_dev, pci_dev, serial, ras_base);
+}
+EXPORT_SYMBOL_NS_GPL(cxl_port_error_detected, "CXL");
+
 static void cxl_handle_rdport_cor_ras(struct cxl_dev_state *cxlds,
 					  struct cxl_dport *dport)
 {
diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
index 0fd6646c1a2e..83d331c82d91 100644
--- a/drivers/cxl/core/port.c
+++ b/drivers/cxl/core/port.c
@@ -1348,8 +1348,8 @@  static struct cxl_port *__find_cxl_port(struct cxl_find_port_ctx *ctx)
 	return NULL;
 }
 
-static struct cxl_port *find_cxl_port(struct device *dport_dev,
-				      struct cxl_dport **dport)
+struct cxl_port *find_cxl_port(struct device *dport_dev,
+			       struct cxl_dport **dport)
 {
 	struct cxl_find_port_ctx ctx = {
 		.dport_dev = dport_dev,
diff --git a/drivers/cxl/core/ras.c b/drivers/cxl/core/ras.c
index f18cb568eabd..fe38e76f2d1a 100644
--- a/drivers/cxl/core/ras.c
+++ b/drivers/cxl/core/ras.c
@@ -110,34 +110,80 @@  static void cxl_cper_prot_err_work_fn(struct work_struct *work)
 }
 static DECLARE_WORK(cxl_cper_prot_err_work, cxl_cper_prot_err_work_fn);
 
+static int match_uport(struct device *dev, const void *data)
+{
+	const struct device *uport_dev = data;
+	struct cxl_port *port;
+
+	if (!is_cxl_port(dev))
+		return 0;
+
+	port = to_cxl_port(dev);
+
+	return port->uport_dev == uport_dev;
+}
+
 int cxl_create_prot_err_info(struct pci_dev *_pdev, int severity,
 			     struct cxl_prot_error_info *err_info)
 {
 	struct pci_dev *pdev __free(pci_dev_put) = pci_dev_get(_pdev);
-	struct cxl_dev_state *cxlds;
 
 	if (!pdev || !err_info) {
 		pr_warn_once("Error: parameter is NULL");
 		return -ENODEV;
 	}
 
-	if ((pci_pcie_type(pdev) != PCI_EXP_TYPE_ENDPOINT) &&
-	    (pci_pcie_type(pdev) != PCI_EXP_TYPE_RC_END)) {
+	*err_info = (struct cxl_prot_error_info){ 0 };
+	err_info->severity = severity;
+	err_info->pdev = pdev;
+
+	switch (pci_pcie_type(pdev)) {
+	case PCI_EXP_TYPE_ROOT_PORT:
+	case PCI_EXP_TYPE_DOWNSTREAM:
+	{
+		struct cxl_dport *dport = NULL;
+		struct cxl_port *port __free(put_cxl_port) =
+			find_cxl_port(&pdev->dev, &dport);
+
+		if (!port || !is_cxl_port(&port->dev))
+			return -ENODEV;
+
+		err_info->ras_base = dport ? dport->regs.ras : NULL;
+		err_info->dev = &port->dev;
+		break;
+	}
+	case PCI_EXP_TYPE_UPSTREAM:
+	{
+		struct cxl_port *port;
+		struct device *port_dev __free(put_device) =
+			bus_find_device(&cxl_bus_type, NULL, &pdev->dev,
+					match_uport);
+
+		if (!port_dev || !is_cxl_port(port_dev))
+			return -ENODEV;
+
+		port = to_cxl_port(port_dev);
+		err_info->ras_base = port ? port->uport_regs.ras : NULL;
+		err_info->dev = port_dev;
+		break;
+	}
+	case PCI_EXP_TYPE_ENDPOINT:
+	case PCI_EXP_TYPE_RC_END:
+	{
+		struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
+		struct cxl_memdev *cxlmd = cxlds->cxlmd;
+		struct device *dev __free(put_device) = get_device(&cxlmd->dev);
+
+		err_info->ras_base = cxlds->regs.ras;
+		err_info->dev = &cxlds->cxlmd->dev;
+		break;
+	}
+	default:
+	{
 		pci_warn_once(pdev, "Error: Unsupported device type (%X)", pci_pcie_type(pdev));
 		return -ENODEV;
 	}
-
-	cxlds = pci_get_drvdata(pdev);
-	struct device *dev __free(put_device) = get_device(&cxlds->cxlmd->dev);
-
-	if (!dev)
-		return -ENODEV;
-
-	*err_info = (struct cxl_prot_error_info){ 0 };
-	err_info->ras_base = cxlds->regs.ras;
-	err_info->severity = severity;
-	err_info->pdev = pdev;
-	err_info->dev = dev;
+	}
 
 	return 0;
 }
diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
index 0d05d5449f97..512cc38892ed 100644
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -810,6 +810,11 @@  struct cxl_error_handlers {
 				   struct cxl_prot_error_info *err_info);
 };
 
+void cxl_port_cor_error_detected(struct device *dev,
+				 struct cxl_prot_error_info *err_info);
+pci_ers_result_t cxl_port_error_detected(struct device *dev,
+					 struct cxl_prot_error_info *err_info);
+
 /**
  * struct cxl_endpoint_dvsec_info - Cached DVSEC info
  * @mem_enabled: cached value of mem_enabled in the DVSEC at init time
diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c
index 1b8dc161428f..30a4bdb88c31 100644
--- a/drivers/cxl/port.c
+++ b/drivers/cxl/port.c
@@ -60,6 +60,24 @@  static int discover_region(struct device *dev, void *root)
 
 #ifdef CONFIG_PCIEAER_CXL
 
+static const struct cxl_error_handlers cxl_port_error_handlers = {
+	.error_detected = cxl_port_error_detected,
+	.cor_error_detected = cxl_port_cor_error_detected,
+};
+
+static void cxl_assign_error_handlers(struct device *_dev,
+				      const struct cxl_error_handlers *handlers)
+{
+	struct device *dev __free(put_device) = get_device(_dev);
+	struct cxl_driver *pdrv;
+
+	if (!dev)
+		return;
+
+	pdrv = to_cxl_drv(dev->driver);
+	pdrv->err_handler = handlers;
+}
+
 static void cxl_dport_map_rch_aer(struct cxl_dport *dport)
 {
 	resource_size_t aer_phys;
@@ -118,8 +136,12 @@  static void cxl_uport_init_ras_reporting(struct cxl_port *port,
 
 	map->host = host;
 	if (cxl_map_component_regs(map, &port->uport_regs,
-				   BIT(CXL_CM_CAP_CAP_ID_RAS)))
+				   BIT(CXL_CM_CAP_CAP_ID_RAS))) {
 		dev_dbg(&port->dev, "Failed to map RAS capability\n");
+		return;
+	}
+
+	cxl_assign_error_handlers(&port->dev, &cxl_port_error_handlers);
 }
 
 /**
@@ -144,9 +166,12 @@  void cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device *host)
 	}
 
 	if (cxl_map_component_regs(&dport->reg_map, &dport->regs.component,
-				   BIT(CXL_CM_CAP_CAP_ID_RAS)))
+				   BIT(CXL_CM_CAP_CAP_ID_RAS))) {
 		dev_dbg(dport->dport_dev, "Failed to map RAS capability\n");
+		return;
+	}
 
+	cxl_assign_error_handlers(dport->dport_dev, &cxl_port_error_handlers);
 }
 EXPORT_SYMBOL_NS_GPL(cxl_dport_init_ras_reporting, "CXL");