@@ -572,7 +572,7 @@ struct rcec_ea {
void pci_save_dpc_state(struct pci_dev *dev);
void pci_restore_dpc_state(struct pci_dev *dev);
void pci_dpc_init(struct pci_dev *pdev);
-void dpc_process_error(struct pci_dev *pdev);
+struct pci_dev *dpc_process_error(struct pci_dev *pdev);
pci_ers_result_t dpc_reset_link(struct pci_dev *pdev);
bool pci_dpc_recovered(struct pci_dev *pdev);
unsigned int dpc_tlp_log_len(struct pci_dev *dev);
@@ -253,10 +253,20 @@ static int dpc_get_aer_uncorrect_severity(struct pci_dev *dev,
return 1;
}
-void dpc_process_error(struct pci_dev *pdev)
+/**
+ * dpc_process_error - handle the DPC error status
+ * @pdev: the port that experienced the containment event
+ *
+ * Return the device that detected the error.
+ *
+ * NOTE: The device reference count is increased, the caller must decrement
+ * the reference count by calling pci_dev_put().
+ */
+struct pci_dev *dpc_process_error(struct pci_dev *pdev)
{
u16 cap = pdev->dpc_cap, status, source, reason, ext_reason;
struct aer_err_info info;
+ struct pci_dev *err_dev;
pci_read_config_word(pdev, cap + PCI_EXP_DPC_STATUS, &status);
pci_read_config_word(pdev, cap + PCI_EXP_DPC_SOURCE_ID, &source);
@@ -279,6 +289,13 @@ void dpc_process_error(struct pci_dev *pdev)
"software trigger" :
"reserved error");
+ if (reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_NFE ||
+ reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_FE)
+ err_dev = pci_get_domain_bus_and_slot(pci_domain_nr(pdev->bus),
+ PCI_BUS_NUM(source), source & 0xff);
+ else
+ err_dev = pci_dev_get(pdev);
+
/* show RP PIO error detail information */
if (pdev->dpc_rp_extensions &&
reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_IN_EXT &&
@@ -291,6 +308,8 @@ void dpc_process_error(struct pci_dev *pdev)
pci_aer_clear_nonfatal_status(pdev);
pci_aer_clear_fatal_status(pdev);
}
+
+ return err_dev;
}
static void pci_clear_surpdn_errors(struct pci_dev *pdev)
@@ -346,7 +365,7 @@ static bool dpc_is_surprise_removal(struct pci_dev *pdev)
static irqreturn_t dpc_handler(int irq, void *context)
{
- struct pci_dev *err_port = context;
+ struct pci_dev *err_port = context, *err_dev;
/*
* According to PCIe r6.0 sec 6.7.6, errors are an expected side effect
@@ -357,10 +376,11 @@ static irqreturn_t dpc_handler(int irq, void *context)
return IRQ_HANDLED;
}
- dpc_process_error(err_port);
+ err_dev = dpc_process_error(err_port);
/* We configure DPC so it only triggers on ERR_FATAL */
- pcie_do_recovery(err_port, pci_channel_io_frozen, dpc_reset_link);
+ pcie_do_recovery(err_dev, pci_channel_io_frozen, dpc_reset_link);
+ pci_dev_put(err_dev);
return IRQ_HANDLED;
}
@@ -150,7 +150,7 @@ static int acpi_send_edr_status(struct pci_dev *pdev, struct pci_dev *edev,
static void edr_handle_event(acpi_handle handle, u32 event, void *data)
{
- struct pci_dev *pdev = data, *err_port;
+ struct pci_dev *pdev = data, *err_port, *err_dev;
pci_ers_result_t estate = PCI_ERS_RESULT_DISCONNECT;
u16 status;
@@ -190,7 +190,7 @@ static void edr_handle_event(acpi_handle handle, u32 event, void *data)
goto send_ost;
}
- dpc_process_error(err_port);
+ err_dev = dpc_process_error(err_port);
pci_aer_raw_clear_status(err_port);
/*
@@ -198,7 +198,7 @@ static void edr_handle_event(acpi_handle handle, u32 event, void *data)
* or ERR_NONFATAL, since the link is already down, use the FATAL
* error recovery path for both cases.
*/
- estate = pcie_do_recovery(err_port, pci_channel_io_frozen, dpc_reset_link);
+ estate = pcie_do_recovery(err_dev, pci_channel_io_frozen, dpc_reset_link);
send_ost:
@@ -216,6 +216,7 @@ static void edr_handle_event(acpi_handle handle, u32 event, void *data)
}
pci_dev_put(err_port);
+ pci_dev_put(err_dev);
}
void pci_acpi_add_edr_notifier(struct pci_dev *pdev)
The current implementation of pcie_do_recovery() assumes that the recovery process is executed on the device that detected the error. However, the DPC driver currently passes the error port that experienced the DPC event to pcie_do_recovery(). Use the SOURCE ID register to correctly identify the device that detected the error. When passing the error device, the pcie_do_recovery() will find the upstream bridge and walk bridges potentially AER affected. And subsequent patches will be able to accurately access AER status of the error device. Should not observe any functional changes. Signed-off-by: Shuai Xue <xueshuai@linux.alibaba.com> --- drivers/pci/pci.h | 2 +- drivers/pci/pcie/dpc.c | 28 ++++++++++++++++++++++++---- drivers/pci/pcie/edr.c | 7 ++++--- 3 files changed, 29 insertions(+), 8 deletions(-)