diff mbox series

[v2,14/14] cxl/pci: Add support to assign and clear pci_driver::cxl_err_handlers

Message ID 20241025210305.27499-15-terry.bowman@amd.com (mailing list archive)
State Superseded
Delegated to: Bjorn Helgaas
Headers show
Series Enable CXL PCIe port protocol error handling and logging | expand

Commit Message

Bowman, Terry Oct. 25, 2024, 9:03 p.m. UTC
pci_driver::cxl_err_handlers are not currrently assigned handler callbacks.
The handlers can't be set in the pci_driver static definition because the
CXL PCIe port devices are bound to the portdrv driver which is not CXL
driver aware.

Add cxl_assign_port_error_handlers() in the cxl_core module. This
function will assign the default handlers for a CXL PCIe port device.

When the CXL port (cxl_port or cxl_dport) is destroyed the CXL PCIe port
device's pci_driver::cxl_err_handlers must be set to NULL to prevent future
use. Create cxl_clear_port_error_handlers() and register it to be called
when the CXL port device (cxl_port or cxl_dport) is destroyed.

Signed-off-by: Terry Bowman <terry.bowman@amd.com>
---
 drivers/cxl/core/pci.c | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

Comments

Jonathan Cameron Oct. 30, 2024, 4:11 p.m. UTC | #1
On Fri, 25 Oct 2024 16:03:05 -0500
Terry Bowman <terry.bowman@amd.com> wrote:

> pci_driver::cxl_err_handlers are not currrently assigned handler callbacks.
> The handlers can't be set in the pci_driver static definition because the
> CXL PCIe port devices are bound to the portdrv driver which is not CXL
> driver aware.
> 
> Add cxl_assign_port_error_handlers() in the cxl_core module. This
> function will assign the default handlers for a CXL PCIe port device.
> 
> When the CXL port (cxl_port or cxl_dport) is destroyed the CXL PCIe port
> device's pci_driver::cxl_err_handlers must be set to NULL to prevent future
> use. Create cxl_clear_port_error_handlers() and register it to be called
> when the CXL port device (cxl_port or cxl_dport) is destroyed.
> 
> Signed-off-by: Terry Bowman <terry.bowman@amd.com>
One trivial comment inline. 
> ---
>  drivers/cxl/core/pci.c | 35 +++++++++++++++++++++++++++++++++++
>  1 file changed, 35 insertions(+)
> 
> diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
> index eeb4a64ba5b5..5f7570c6173c 100644
> --- a/drivers/cxl/core/pci.c
> +++ b/drivers/cxl/core/pci.c
> @@ -839,8 +839,36 @@ static bool cxl_port_error_detected(struct pci_dev *pdev)
>  	return ue;
>  }
>  
> +static const struct cxl_error_handlers cxl_port_error_handlers = {
> +	.error_detected	= cxl_port_error_detected,
> +	.cor_error_detected	= cxl_port_cor_error_detected,
Odd spacing?  I'd just use a single space as aligning these almost
always makes for messy future patches.

> +};
> +
> +static void cxl_assign_port_error_handlers(struct pci_dev *pdev)
> +{
> +	struct pci_driver *pdrv = pdev->driver;
> +
> +	if (!pdrv)
> +		return;
> +
> +	pdrv->cxl_err_handler = &cxl_port_error_handlers;
> +}
> +
> +static void cxl_clear_port_error_handlers(void *data)
> +{
> +	struct pci_dev *pdev = data;
> +	struct pci_driver *pdrv = pdev->driver;
> +
> +	if (!pdrv)
> +		return;
> +
> +	pdrv->cxl_err_handler = NULL;
> +}
> +
>  void cxl_uport_init_ras_reporting(struct cxl_port *port)
>  {
> +	struct pci_dev *pdev = to_pci_dev(port->uport_dev);
> +
>  	/* uport may have more than 1 downstream EP. Check if already mapped. */
>  	if (port->uport_regs.ras) {
>  		dev_warn(&port->dev, "RAS is already mapped\n");
> @@ -853,6 +881,9 @@ void cxl_uport_init_ras_reporting(struct cxl_port *port)
>  		dev_err(&port->dev, "Failed to map RAS capability.\n");
>  		return;
>  	}
> +
> +	cxl_assign_port_error_handlers(pdev);
> +	devm_add_action_or_reset(port->uport_dev, cxl_clear_port_error_handlers, pdev);
>  }
>  EXPORT_SYMBOL_NS_GPL(cxl_uport_init_ras_reporting, CXL);
>  
> @@ -865,6 +896,7 @@ void cxl_dport_init_ras_reporting(struct cxl_dport *dport)
>  {
>  	struct device *dport_dev = dport->dport_dev;
>  	struct pci_host_bridge *host_bridge = to_pci_host_bridge(dport_dev);
> +	struct pci_dev *pdev = to_pci_dev(dport_dev);
>  
>  	if (dport->rch && host_bridge->native_aer) {
>  		cxl_dport_map_rch_aer(dport);
> @@ -883,6 +915,9 @@ void cxl_dport_init_ras_reporting(struct cxl_dport *dport)
>  		dev_err(dport_dev, "Failed to map RAS capability.\n");
>  		return;
>  	}
> +
> +	cxl_assign_port_error_handlers(pdev);
> +	devm_add_action_or_reset(dport_dev, cxl_clear_port_error_handlers, pdev);
>  }
>  EXPORT_SYMBOL_NS_GPL(cxl_dport_init_ras_reporting, CXL);
>
Bowman, Terry Oct. 30, 2024, 9:34 p.m. UTC | #2
Hi Jonathan,

On 10/30/2024 11:11 AM, Jonathan Cameron wrote:
> On Fri, 25 Oct 2024 16:03:05 -0500
> Terry Bowman <terry.bowman@amd.com> wrote:
>
>> pci_driver::cxl_err_handlers are not currrently assigned handler callbacks.
>> The handlers can't be set in the pci_driver static definition because the
>> CXL PCIe port devices are bound to the portdrv driver which is not CXL
>> driver aware.
>>
>> Add cxl_assign_port_error_handlers() in the cxl_core module. This
>> function will assign the default handlers for a CXL PCIe port device.
>>
>> When the CXL port (cxl_port or cxl_dport) is destroyed the CXL PCIe port
>> device's pci_driver::cxl_err_handlers must be set to NULL to prevent future
>> use. Create cxl_clear_port_error_handlers() and register it to be called
>> when the CXL port device (cxl_port or cxl_dport) is destroyed.
>>
>> Signed-off-by: Terry Bowman <terry.bowman@amd.com>
> One trivial comment inline. 
>> ---
>>  drivers/cxl/core/pci.c | 35 +++++++++++++++++++++++++++++++++++
>>  1 file changed, 35 insertions(+)
>>
>> diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
>> index eeb4a64ba5b5..5f7570c6173c 100644
>> --- a/drivers/cxl/core/pci.c
>> +++ b/drivers/cxl/core/pci.c
>> @@ -839,8 +839,36 @@ static bool cxl_port_error_detected(struct pci_dev *pdev)
>>  	return ue;
>>  }
>>  
>> +static const struct cxl_error_handlers cxl_port_error_handlers = {
>> +	.error_detected	= cxl_port_error_detected,
>> +	.cor_error_detected	= cxl_port_cor_error_detected,
> Odd spacing?  I'd just use a single space as aligning these almost
> always makes for messy future patches.

Thanks for pointing out. I'll fix it.

Regards,
Terry

>> +};
>> +
>> +static void cxl_assign_port_error_handlers(struct pci_dev *pdev)
>> +{
>> +	struct pci_driver *pdrv = pdev->driver;
>> +
>> +	if (!pdrv)
>> +		return;
>> +
>> +	pdrv->cxl_err_handler = &cxl_port_error_handlers;
>> +}
>> +
>> +static void cxl_clear_port_error_handlers(void *data)
>> +{
>> +	struct pci_dev *pdev = data;
>> +	struct pci_driver *pdrv = pdev->driver;
>> +
>> +	if (!pdrv)
>> +		return;
>> +
>> +	pdrv->cxl_err_handler = NULL;
>> +}
>> +
>>  void cxl_uport_init_ras_reporting(struct cxl_port *port)
>>  {
>> +	struct pci_dev *pdev = to_pci_dev(port->uport_dev);
>> +
>>  	/* uport may have more than 1 downstream EP. Check if already mapped. */
>>  	if (port->uport_regs.ras) {
>>  		dev_warn(&port->dev, "RAS is already mapped\n");
>> @@ -853,6 +881,9 @@ void cxl_uport_init_ras_reporting(struct cxl_port *port)
>>  		dev_err(&port->dev, "Failed to map RAS capability.\n");
>>  		return;
>>  	}
>> +
>> +	cxl_assign_port_error_handlers(pdev);
>> +	devm_add_action_or_reset(port->uport_dev, cxl_clear_port_error_handlers, pdev);
>>  }
>>  EXPORT_SYMBOL_NS_GPL(cxl_uport_init_ras_reporting, CXL);
>>  
>> @@ -865,6 +896,7 @@ void cxl_dport_init_ras_reporting(struct cxl_dport *dport)
>>  {
>>  	struct device *dport_dev = dport->dport_dev;
>>  	struct pci_host_bridge *host_bridge = to_pci_host_bridge(dport_dev);
>> +	struct pci_dev *pdev = to_pci_dev(dport_dev);
>>  
>>  	if (dport->rch && host_bridge->native_aer) {
>>  		cxl_dport_map_rch_aer(dport);
>> @@ -883,6 +915,9 @@ void cxl_dport_init_ras_reporting(struct cxl_dport *dport)
>>  		dev_err(dport_dev, "Failed to map RAS capability.\n");
>>  		return;
>>  	}
>> +
>> +	cxl_assign_port_error_handlers(pdev);
>> +	devm_add_action_or_reset(dport_dev, cxl_clear_port_error_handlers, pdev);
>>  }
>>  EXPORT_SYMBOL_NS_GPL(cxl_dport_init_ras_reporting, CXL);
>>
diff mbox series

Patch

diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
index eeb4a64ba5b5..5f7570c6173c 100644
--- a/drivers/cxl/core/pci.c
+++ b/drivers/cxl/core/pci.c
@@ -839,8 +839,36 @@  static bool cxl_port_error_detected(struct pci_dev *pdev)
 	return ue;
 }
 
+static const struct cxl_error_handlers cxl_port_error_handlers = {
+	.error_detected	= cxl_port_error_detected,
+	.cor_error_detected	= cxl_port_cor_error_detected,
+};
+
+static void cxl_assign_port_error_handlers(struct pci_dev *pdev)
+{
+	struct pci_driver *pdrv = pdev->driver;
+
+	if (!pdrv)
+		return;
+
+	pdrv->cxl_err_handler = &cxl_port_error_handlers;
+}
+
+static void cxl_clear_port_error_handlers(void *data)
+{
+	struct pci_dev *pdev = data;
+	struct pci_driver *pdrv = pdev->driver;
+
+	if (!pdrv)
+		return;
+
+	pdrv->cxl_err_handler = NULL;
+}
+
 void cxl_uport_init_ras_reporting(struct cxl_port *port)
 {
+	struct pci_dev *pdev = to_pci_dev(port->uport_dev);
+
 	/* uport may have more than 1 downstream EP. Check if already mapped. */
 	if (port->uport_regs.ras) {
 		dev_warn(&port->dev, "RAS is already mapped\n");
@@ -853,6 +881,9 @@  void cxl_uport_init_ras_reporting(struct cxl_port *port)
 		dev_err(&port->dev, "Failed to map RAS capability.\n");
 		return;
 	}
+
+	cxl_assign_port_error_handlers(pdev);
+	devm_add_action_or_reset(port->uport_dev, cxl_clear_port_error_handlers, pdev);
 }
 EXPORT_SYMBOL_NS_GPL(cxl_uport_init_ras_reporting, CXL);
 
@@ -865,6 +896,7 @@  void cxl_dport_init_ras_reporting(struct cxl_dport *dport)
 {
 	struct device *dport_dev = dport->dport_dev;
 	struct pci_host_bridge *host_bridge = to_pci_host_bridge(dport_dev);
+	struct pci_dev *pdev = to_pci_dev(dport_dev);
 
 	if (dport->rch && host_bridge->native_aer) {
 		cxl_dport_map_rch_aer(dport);
@@ -883,6 +915,9 @@  void cxl_dport_init_ras_reporting(struct cxl_dport *dport)
 		dev_err(dport_dev, "Failed to map RAS capability.\n");
 		return;
 	}
+
+	cxl_assign_port_error_handlers(pdev);
+	devm_add_action_or_reset(dport_dev, cxl_clear_port_error_handlers, pdev);
 }
 EXPORT_SYMBOL_NS_GPL(cxl_dport_init_ras_reporting, CXL);