diff mbox series

[v4,07/15] PCI/AER: Add CXL PCIe Port Uncorrectable Error recovery in AER service driver

Message ID 20241211234002.3728674-8-terry.bowman@amd.com (mailing list archive)
State New
Delegated to: Bjorn Helgaas
Headers show
Series Enable CXL PCIe Port protocol error handling and logging | expand

Commit Message

Bowman, Terry Dec. 11, 2024, 11:39 p.m. UTC
Existing recovery procedure for PCIe Uncorrectable Errors (UCE) does not
apply to CXL devices. Recovery can not be used for CXL devices because of
potential corruption on what can be system memory. Also, current PCIe UCE
recovery, in the case of a Root Port (RP) or Downstream Switch Port (DSP),
does not begin at the RP/DSP but begins at the first downstream device.
This will miss handling CXL Protocol Errors in a CXL RP or DSP. A separate
CXL recovery is needed because of the different handling requirements

Add a new function, cxl_do_recovery() using the following.

Add cxl_walk_bridge() to iterate the detected error's sub-topology.
cxl_walk_bridge() is similar to pci_walk_bridge() but the CXL flavor
will begin iteration at the RP or DSP rather than beginning at the
first downstream device.

Add cxl_report_error_detected() as an analog to report_error_detected().
It will call pci_driver::cxl_err_handlers for each iterated downstream
device. The pci_driver::cxl_err_handler's UCE handler returns a boolean
indicating if there was a UCE error detected during handling.

cxl_do_recovery() uses the status from cxl_report_error_detected() to
determine how to proceed. Non-fatal CXL UCE errors will be treated as
fatal. If a UCE was present during handling then cxl_do_recovery()
will kernel panic.

Signed-off-by: Terry Bowman <terry.bowman@amd.com>
---
 drivers/pci/pci.h      |  3 +++
 drivers/pci/pcie/aer.c |  4 ++++
 drivers/pci/pcie/err.c | 54 ++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 61 insertions(+)

Comments

Alejandro Lucero Palau Dec. 12, 2024, 9:28 a.m. UTC | #1
On 12/11/24 23:39, Terry Bowman wrote:
> Existing recovery procedure for PCIe Uncorrectable Errors (UCE) does not
> apply to CXL devices. Recovery can not be used for CXL devices because of
> potential corruption on what can be system memory. Also, current PCIe UCE
> recovery, in the case of a Root Port (RP) or Downstream Switch Port (DSP),
> does not begin at the RP/DSP but begins at the first downstream device.
> This will miss handling CXL Protocol Errors in a CXL RP or DSP. A separate
> CXL recovery is needed because of the different handling requirements
>
> Add a new function, cxl_do_recovery() using the following.
>
> Add cxl_walk_bridge() to iterate the detected error's sub-topology.
> cxl_walk_bridge() is similar to pci_walk_bridge() but the CXL flavor
> will begin iteration at the RP or DSP rather than beginning at the
> first downstream device.
>
> Add cxl_report_error_detected() as an analog to report_error_detected().
> It will call pci_driver::cxl_err_handlers for each iterated downstream
> device. The pci_driver::cxl_err_handler's UCE handler returns a boolean
> indicating if there was a UCE error detected during handling.
>
> cxl_do_recovery() uses the status from cxl_report_error_detected() to
> determine how to proceed. Non-fatal CXL UCE errors will be treated as
> fatal. If a UCE was present during handling then cxl_do_recovery()
> will kernel panic.
>
> Signed-off-by: Terry Bowman <terry.bowman@amd.com>
> ---
>   drivers/pci/pci.h      |  3 +++
>   drivers/pci/pcie/aer.c |  4 ++++
>   drivers/pci/pcie/err.c | 54 ++++++++++++++++++++++++++++++++++++++++++
>   3 files changed, 61 insertions(+)
>
> diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
> index 14d00ce45bfa..5a67e41919d8 100644
> --- a/drivers/pci/pci.h
> +++ b/drivers/pci/pci.h
> @@ -658,6 +658,9 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
>   		pci_channel_state_t state,
>   		pci_ers_result_t (*reset_subordinates)(struct pci_dev *pdev));
>   
> +/* CXL error reporting and handling */
> +void cxl_do_recovery(struct pci_dev *dev);
> +
>   bool pcie_wait_for_link(struct pci_dev *pdev, bool active);
>   int pcie_retrain_link(struct pci_dev *pdev, bool use_lt);
>   
> diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
> index c1eb939c1cca..861521872318 100644
> --- a/drivers/pci/pcie/aer.c
> +++ b/drivers/pci/pcie/aer.c
> @@ -1024,6 +1024,8 @@ static int cxl_rch_handle_error_iter(struct pci_dev *dev, void *data)
>   			err_handler->error_detected(dev, pci_channel_io_normal);
>   		else if (info->severity == AER_FATAL)
>   			err_handler->error_detected(dev, pci_channel_io_frozen);
> +
> +		cxl_do_recovery(dev);
>   	}
>   out:
>   	device_unlock(&dev->dev);
> @@ -1048,6 +1050,8 @@ static void cxl_handle_error(struct pci_dev *dev, struct aer_err_info *info)
>   			pdrv->cxl_err_handler->cor_error_detected(dev);
>   
>   		pcie_clear_device_status(dev);
> +	} else {
> +		cxl_do_recovery(dev);
>   	}
>   }
>   
> diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c
> index 31090770fffc..6f7cf5e0087f 100644
> --- a/drivers/pci/pcie/err.c
> +++ b/drivers/pci/pcie/err.c
> @@ -276,3 +276,57 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
>   
>   	return status;
>   }
> +
> +static void cxl_walk_bridge(struct pci_dev *bridge,
> +			    int (*cb)(struct pci_dev *, void *),
> +			    void *userdata)
> +{
> +	bool *status = userdata;
> +
> +	cb(bridge, status);
> +	if (bridge->subordinate && !*status)


I would prefer to use not a pointer for status as you are not changing 
what it points to here, so first a cast then using just !status in the 
conditional.


> +		pci_walk_bus(bridge->subordinate, cb, status);
> +}
> +
> +static int cxl_report_error_detected(struct pci_dev *dev, void *data)
> +{
> +	struct pci_driver *pdrv = dev->driver;
> +	bool *status = data;
> +
> +	device_lock(&dev->dev);
> +	if (pdrv && pdrv->cxl_err_handler &&
> +	    pdrv->cxl_err_handler->error_detected) {
> +		const struct cxl_error_handlers *cxl_err_handler =
> +			pdrv->cxl_err_handler;
> +		*status |= cxl_err_handler->error_detected(dev);


This implies status should not be a bool pointer as different bits can 
be set by the returning value, but as the code seems to only care about 
any bit implying an error and therefore error detected, I guess that is 
fine. However, the next function calling this one is using an int ...


Confusing to me. I would expect here not an OR but returning just when a 
first error is detected, handling the lock properly, with the walk 
function behind the scenes breaking the walk if the return is anything 
other than zero.


> +	}
> +	device_unlock(&dev->dev);
> +	return *status;
> +}
> +
> +void cxl_do_recovery(struct pci_dev *dev)
> +{
> +	struct pci_host_bridge *host = pci_find_host_bridge(dev->bus);
> +	int type = pci_pcie_type(dev);
> +	struct pci_dev *bridge;
> +	int status;
> +
> +	if (type == PCI_EXP_TYPE_ROOT_PORT ||
> +	    type == PCI_EXP_TYPE_DOWNSTREAM ||
> +	    type == PCI_EXP_TYPE_UPSTREAM ||
> +	    type == PCI_EXP_TYPE_ENDPOINT)
> +		bridge = dev;
> +	else
> +		bridge = pci_upstream_bridge(dev);
> +
> +	cxl_walk_bridge(bridge, cxl_report_error_detected, &status);
> +	if (status)
> +		panic("CXL cachemem error.");
> +
> +	if (host->native_aer || pcie_ports_native) {
> +		pcie_clear_device_status(dev);
> +		pci_aer_clear_nonfatal_status(dev);
> +	}
> +
> +	pci_info(bridge, "CXL uncorrectable error.\n");
> +}
Bowman, Terry Dec. 13, 2024, 3:07 p.m. UTC | #2
On 12/12/2024 3:28 AM, Alejandro Lucero Palau wrote:
> On 12/11/24 23:39, Terry Bowman wrote:
>> Existing recovery procedure for PCIe Uncorrectable Errors (UCE) does not
>> apply to CXL devices. Recovery can not be used for CXL devices because of
>> potential corruption on what can be system memory. Also, current PCIe UCE
>> recovery, in the case of a Root Port (RP) or Downstream Switch Port (DSP),
>> does not begin at the RP/DSP but begins at the first downstream device.
>> This will miss handling CXL Protocol Errors in a CXL RP or DSP. A separate
>> CXL recovery is needed because of the different handling requirements
>>
>> Add a new function, cxl_do_recovery() using the following.
>>
>> Add cxl_walk_bridge() to iterate the detected error's sub-topology.
>> cxl_walk_bridge() is similar to pci_walk_bridge() but the CXL flavor
>> will begin iteration at the RP or DSP rather than beginning at the
>> first downstream device.
>>
>> Add cxl_report_error_detected() as an analog to report_error_detected().
>> It will call pci_driver::cxl_err_handlers for each iterated downstream
>> device. The pci_driver::cxl_err_handler's UCE handler returns a boolean
>> indicating if there was a UCE error detected during handling.
>>
>> cxl_do_recovery() uses the status from cxl_report_error_detected() to
>> determine how to proceed. Non-fatal CXL UCE errors will be treated as
>> fatal. If a UCE was present during handling then cxl_do_recovery()
>> will kernel panic.
>>
>> Signed-off-by: Terry Bowman <terry.bowman@amd.com>
>> ---
>>   drivers/pci/pci.h      |  3 +++
>>   drivers/pci/pcie/aer.c |  4 ++++
>>   drivers/pci/pcie/err.c | 54 ++++++++++++++++++++++++++++++++++++++++++
>>   3 files changed, 61 insertions(+)
>>
>> diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
>> index 14d00ce45bfa..5a67e41919d8 100644
>> --- a/drivers/pci/pci.h
>> +++ b/drivers/pci/pci.h
>> @@ -658,6 +658,9 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
>>   		pci_channel_state_t state,
>>   		pci_ers_result_t (*reset_subordinates)(struct pci_dev *pdev));
>>   
>> +/* CXL error reporting and handling */
>> +void cxl_do_recovery(struct pci_dev *dev);
>> +
>>   bool pcie_wait_for_link(struct pci_dev *pdev, bool active);
>>   int pcie_retrain_link(struct pci_dev *pdev, bool use_lt);
>>   
>> diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
>> index c1eb939c1cca..861521872318 100644
>> --- a/drivers/pci/pcie/aer.c
>> +++ b/drivers/pci/pcie/aer.c
>> @@ -1024,6 +1024,8 @@ static int cxl_rch_handle_error_iter(struct pci_dev *dev, void *data)
>>   			err_handler->error_detected(dev, pci_channel_io_normal);
>>   		else if (info->severity == AER_FATAL)
>>   			err_handler->error_detected(dev, pci_channel_io_frozen);
>> +
>> +		cxl_do_recovery(dev);
>>   	}
>>   out:
>>   	device_unlock(&dev->dev);
>> @@ -1048,6 +1050,8 @@ static void cxl_handle_error(struct pci_dev *dev, struct aer_err_info *info)
>>   			pdrv->cxl_err_handler->cor_error_detected(dev);
>>   
>>   		pcie_clear_device_status(dev);
>> +	} else {
>> +		cxl_do_recovery(dev);
>>   	}
>>   }
>>   
>> diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c
>> index 31090770fffc..6f7cf5e0087f 100644
>> --- a/drivers/pci/pcie/err.c
>> +++ b/drivers/pci/pcie/err.c
>> @@ -276,3 +276,57 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
>>   
>>   	return status;
>>   }
>> +
>> +static void cxl_walk_bridge(struct pci_dev *bridge,
>> +			    int (*cb)(struct pci_dev *, void *),
>> +			    void *userdata)
>> +{
>> +	bool *status = userdata;
>> +
>> +	cb(bridge, status);
>> +	if (bridge->subordinate && !*status)
>
> I would prefer to use not a pointer for status as you are not changing 
> what it points to here, so first a cast then using just !status in the 
> conditional.
>

Hi Alejandro,

I agree. This can be significantly improved. I'll remove the condition on
'status' and instead introduce condition on cb() and pci_walk_bus() return value.
But, 'status' does need to be a pointer so the caller (cxl_do_recovery()) can
determine if to invoke panic.

>> +		pci_walk_bus(bridge->subordinate, cb, status);
>> +}
>> +
>> +static int cxl_report_error_detected(struct pci_dev *dev, void *data)
>> +{
>> +	struct pci_driver *pdrv = dev->driver;
>> +	bool *status = data;
>> +
>> +	device_lock(&dev->dev);
>> +	if (pdrv && pdrv->cxl_err_handler &&
>> +	    pdrv->cxl_err_handler->error_detected) {
>> +		const struct cxl_error_handlers *cxl_err_handler =
>> +			pdrv->cxl_err_handler;
>> +		*status |= cxl_err_handler->error_detected(dev);
>
> This implies status should not be a bool pointer as different bits can 
> be set by the returning value, but as the code seems to only care about 
> any bit implying an error and therefore error detected, I guess that is 
> fine. However, the next function calling this one is using an int ...
>
>
> Confusing to me. I would expect here not an OR but returning just when a 
> first error is detected, handling the lock properly, with the walk 
> function behind the scenes breaking the walk if the return is anything 
> other than zero.

The cxl_err_handler->error_detected() return value is a bool. But, The bitwise OR is not necessary. I'll refactor as part of mentioned above.

Thanks for the feedback.

-Terry

>
>
>> +	}
>> +	device_unlock(&dev->dev);
>> +	return *status;
>> +}
>> +
>> +void cxl_do_recovery(struct pci_dev *dev)
>> +{
>> +	struct pci_host_bridge *host = pci_find_host_bridge(dev->bus);
>> +	int type = pci_pcie_type(dev);
>> +	struct pci_dev *bridge;
>> +	int status;
>> +
>> +	if (type == PCI_EXP_TYPE_ROOT_PORT ||
>> +	    type == PCI_EXP_TYPE_DOWNSTREAM ||
>> +	    type == PCI_EXP_TYPE_UPSTREAM ||
>> +	    type == PCI_EXP_TYPE_ENDPOINT)
>> +		bridge = dev;
>> +	else
>> +		bridge = pci_upstream_bridge(dev);
>> +
>> +	cxl_walk_bridge(bridge, cxl_report_error_detected, &status);
>> +	if (status)
>> +		panic("CXL cachemem error.");
>> +
>> +	if (host->native_aer || pcie_ports_native) {
>> +		pcie_clear_device_status(dev);
>> +		pci_aer_clear_nonfatal_status(dev);
>> +	}
>> +
>> +	pci_info(bridge, "CXL uncorrectable error.\n");
>> +}
diff mbox series

Patch

diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 14d00ce45bfa..5a67e41919d8 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -658,6 +658,9 @@  pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
 		pci_channel_state_t state,
 		pci_ers_result_t (*reset_subordinates)(struct pci_dev *pdev));
 
+/* CXL error reporting and handling */
+void cxl_do_recovery(struct pci_dev *dev);
+
 bool pcie_wait_for_link(struct pci_dev *pdev, bool active);
 int pcie_retrain_link(struct pci_dev *pdev, bool use_lt);
 
diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
index c1eb939c1cca..861521872318 100644
--- a/drivers/pci/pcie/aer.c
+++ b/drivers/pci/pcie/aer.c
@@ -1024,6 +1024,8 @@  static int cxl_rch_handle_error_iter(struct pci_dev *dev, void *data)
 			err_handler->error_detected(dev, pci_channel_io_normal);
 		else if (info->severity == AER_FATAL)
 			err_handler->error_detected(dev, pci_channel_io_frozen);
+
+		cxl_do_recovery(dev);
 	}
 out:
 	device_unlock(&dev->dev);
@@ -1048,6 +1050,8 @@  static void cxl_handle_error(struct pci_dev *dev, struct aer_err_info *info)
 			pdrv->cxl_err_handler->cor_error_detected(dev);
 
 		pcie_clear_device_status(dev);
+	} else {
+		cxl_do_recovery(dev);
 	}
 }
 
diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c
index 31090770fffc..6f7cf5e0087f 100644
--- a/drivers/pci/pcie/err.c
+++ b/drivers/pci/pcie/err.c
@@ -276,3 +276,57 @@  pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
 
 	return status;
 }
+
+static void cxl_walk_bridge(struct pci_dev *bridge,
+			    int (*cb)(struct pci_dev *, void *),
+			    void *userdata)
+{
+	bool *status = userdata;
+
+	cb(bridge, status);
+	if (bridge->subordinate && !*status)
+		pci_walk_bus(bridge->subordinate, cb, status);
+}
+
+static int cxl_report_error_detected(struct pci_dev *dev, void *data)
+{
+	struct pci_driver *pdrv = dev->driver;
+	bool *status = data;
+
+	device_lock(&dev->dev);
+	if (pdrv && pdrv->cxl_err_handler &&
+	    pdrv->cxl_err_handler->error_detected) {
+		const struct cxl_error_handlers *cxl_err_handler =
+			pdrv->cxl_err_handler;
+		*status |= cxl_err_handler->error_detected(dev);
+	}
+	device_unlock(&dev->dev);
+	return *status;
+}
+
+void cxl_do_recovery(struct pci_dev *dev)
+{
+	struct pci_host_bridge *host = pci_find_host_bridge(dev->bus);
+	int type = pci_pcie_type(dev);
+	struct pci_dev *bridge;
+	int status;
+
+	if (type == PCI_EXP_TYPE_ROOT_PORT ||
+	    type == PCI_EXP_TYPE_DOWNSTREAM ||
+	    type == PCI_EXP_TYPE_UPSTREAM ||
+	    type == PCI_EXP_TYPE_ENDPOINT)
+		bridge = dev;
+	else
+		bridge = pci_upstream_bridge(dev);
+
+	cxl_walk_bridge(bridge, cxl_report_error_detected, &status);
+	if (status)
+		panic("CXL cachemem error.");
+
+	if (host->native_aer || pcie_ports_native) {
+		pcie_clear_device_status(dev);
+		pci_aer_clear_nonfatal_status(dev);
+	}
+
+	pci_info(bridge, "CXL uncorrectable error.\n");
+}