diff mbox

[2/4] PCI/DPC/AER: Address Concurrency between AER and DPC

Message ID 1514370022-4431-3-git-send-email-poza@codeaurora.org (mailing list archive)
State New, archived
Delegated to: Bjorn Helgaas
Headers show

Commit Message

Oza Pawandeep Dec. 27, 2017, 10:20 a.m. UTC
This patch addresses the race condition between AER and DPC for recovery.

Current DPC driver does not do recovery, e.g. calling end-point's driver's
callbacks, which sanitize the device.
DPC driver implements link_reset callback, and calls pci_do_recovery.

Signed-off-by: Oza Pawandeep <poza@codeaurora.org>

Comments

kernel test robot Dec. 28, 2017, 4:27 p.m. UTC | #1
Hi Oza,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on linus/master]
[also build test WARNING on v4.15-rc5]
[cannot apply to pci/next pm/linux-next next-20171222]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Oza-Pawandeep/Address-error-and-recovery-for-AER-and-DPC/20171228-222058
reproduce:
        # apt-get install sparse
        make ARCH=x86_64 allmodconfig
        make C=1 CF=-D__CHECK_ENDIAN__


sparse warnings: (new ones prefixed by >>)


Please review and possibly fold the followup patch.

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation
kernel test robot Dec. 28, 2017, 5:37 p.m. UTC | #2
Hi Oza,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on linus/master]
[also build test ERROR on v4.15-rc5]
[cannot apply to pci/next pm/linux-next next-20171222]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Oza-Pawandeep/Address-error-and-recovery-for-AER-and-DPC/20171228-222058
config: arm64-defconfig (attached as .config)
compiler: aarch64-linux-gnu-gcc (Debian 7.2.0-11) 7.2.0
reproduce:
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        make.cross ARCH=arm64 

All errors (new ones prefixed by >>):

   drivers/pci/pcie/pcie-err.o: In function `pci_reset_link':
>> pcie-err.c:(.text+0x3f8): undefined reference to `pci_find_dpc_service'
   pcie-err.c:(.text+0x3f8): relocation truncated to fit: R_AARCH64_CALL26 against undefined symbol `pci_find_dpc_service'

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation
kernel test robot Dec. 28, 2017, 6:07 p.m. UTC | #3
Hi Oza,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on linus/master]
[also build test ERROR on v4.15-rc5]
[cannot apply to pci/next pm/linux-next next-20171222]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Oza-Pawandeep/Address-error-and-recovery-for-AER-and-DPC/20171228-222058
config: x86_64-randconfig-r0-12282251 (attached as .config)
compiler: gcc-6 (Debian 6.4.0-9) 6.4.0 20171026
reproduce:
        # save the attached .config to linux build tree
        make ARCH=x86_64 

All errors (new ones prefixed by >>):

   drivers/pci/pcie/pcie-err.o: In function `pci_reset_link':
>> drivers/pci/pcie/pcie-err.c:195: undefined reference to `pci_find_dpc_service'

vim +195 drivers/pci/pcie/pcie-err.c

   178	
   179	pci_ers_result_t pci_reset_link(struct pci_dev *dev, int severity)
   180	{
   181		struct pci_dev *udev;
   182		pci_ers_result_t status;
   183		struct pcie_port_service_driver *driver;
   184	
   185		if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
   186			/* Reset this port for all subordinates */
   187			udev = dev;
   188		} else {
   189			/* Reset the upstream component (likely downstream port) */
   190			udev = dev->bus->self;
   191		}
   192	
   193		/* Use the aer driver of the component firstly */
   194		if (severity == PCI_ERR_DPC_FATAL)
 > 195			driver = pci_find_dpc_service(udev);
   196		else
   197			driver = pci_find_aer_service(udev);
   198	
   199		if (driver && driver->reset_link) {
   200			status = driver->reset_link(udev);
   201		} else if (udev->has_secondary_link) {
   202			status = pci_default_reset_link(udev);
   203		} else {
   204			dev_printk(KERN_DEBUG, &dev->dev,
   205				"no link-reset support at upstream device %s\n",
   206				pci_name(udev));
   207			return PCI_ERS_RESULT_DISCONNECT;
   208		}
   209	
   210		if (status != PCI_ERS_RESULT_RECOVERED) {
   211			dev_printk(KERN_DEBUG, &dev->dev,
   212				"link reset at upstream device %s failed\n",
   213				pci_name(udev));
   214			return PCI_ERS_RESULT_DISCONNECT;
   215		}
   216	
   217		return status;
   218	}
   219	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation
diff mbox

Patch

diff --git a/drivers/pci/pcie/pcie-dpc.c b/drivers/pci/pcie/pcie-dpc.c
index 2d976a6..e7ced58 100644
--- a/drivers/pci/pcie/pcie-dpc.c
+++ b/drivers/pci/pcie/pcie-dpc.c
@@ -15,6 +15,9 @@ 
 #include <linux/pci.h>
 #include <linux/pcieport_if.h>
 #include "../pci.h"
+#include "portdrv.h"
+
+static pci_ers_result_t dpc_reset_link(struct pci_dev *pdev);
 
 struct rp_pio_header_log_regs {
 	u32 dw0;
@@ -67,6 +70,60 @@  struct dpc_dev {
 	"Memory Request Completion Timeout",		 /* Bit Position 18 */
 };
 
+static int find_dpc_dev_iter(struct device *device, void *data)
+{
+	struct pcie_port_service_driver *service_driver;
+	struct device **dev;
+
+	dev = (struct device **) data;
+
+	if (device->bus == &pcie_port_bus_type && device->driver) {
+		service_driver = to_service_driver(device->driver);
+		if (service_driver->service == PCIE_PORT_SERVICE_DPC) {
+			*dev = device;
+			return 1;
+		}
+	}
+
+	return 0;
+}
+
+struct device *pci_find_dpc_dev(struct pci_dev *pdev)
+{
+	struct device *dev = NULL;
+
+	device_for_each_child(&pdev->dev, &dev, find_dpc_dev_iter);
+
+	return dev;
+}
+
+static int find_dpc_service_iter(struct device *device, void *data)
+{
+	struct pcie_port_service_driver *service_driver, **drv;
+
+	drv = (struct pcie_port_service_driver **) data;
+
+	if (device->bus == &pcie_port_bus_type && device->driver) {
+		service_driver = to_service_driver(device->driver);
+		if (service_driver->service == PCIE_PORT_SERVICE_DPC) {
+			*drv = service_driver;
+			return 1;
+		}
+	}
+
+	return 0;
+}
+
+struct pcie_port_service_driver *pci_find_dpc_service(struct pci_dev *dev)
+{
+	struct pcie_port_service_driver *drv = NULL;
+
+	device_for_each_child(&dev->dev, &drv, find_dpc_service_iter);
+
+	return drv;
+}
+EXPORT_SYMBOL(pci_find_dpc_service);
+
 static int dpc_wait_rp_inactive(struct dpc_dev *dpc)
 {
 	unsigned long timeout = jiffies + HZ;
@@ -104,11 +161,23 @@  static void dpc_wait_link_inactive(struct dpc_dev *dpc)
 		dev_warn(dev, "Link state not disabled for DPC event\n");
 }
 
-static void interrupt_event_handler(struct work_struct *work)
+/**
+ * dpc_reset_link - reset link DPC  routine
+ * @dev: pointer to Root Port's pci_dev data structure
+ *
+ * Invoked by Port Bus driver when performing link reset at Root Port.
+ */
+static pci_ers_result_t dpc_reset_link(struct pci_dev *pdev)
 {
-	struct dpc_dev *dpc = container_of(work, struct dpc_dev, work);
-	struct pci_dev *dev, *temp, *pdev = dpc->dev->port;
 	struct pci_bus *parent = pdev->subordinate;
+	struct pci_dev *dev, *temp;
+	struct dpc_dev *dpc;
+	struct pcie_device *pciedev;
+	struct device *devdpc;
+
+	devdpc = pci_find_dpc_dev(pdev);
+	pciedev = to_pcie_device(devdpc);
+	dpc = get_service_data(pciedev);
 
 	pci_lock_rescan_remove();
 	list_for_each_entry_safe_reverse(dev, temp, &parent->devices,
@@ -125,7 +194,7 @@  static void interrupt_event_handler(struct work_struct *work)
 
 	dpc_wait_link_inactive(dpc);
 	if (dpc->rp && dpc_wait_rp_inactive(dpc))
-		return;
+		return PCI_ERS_RESULT_DISCONNECT;
 	if (dpc->rp && dpc->rp_pio_status) {
 		pci_write_config_dword(pdev,
 				      dpc->cap_pos + PCI_EXP_DPC_RP_PIO_STATUS,
@@ -135,6 +204,17 @@  static void interrupt_event_handler(struct work_struct *work)
 
 	pci_write_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_STATUS,
 		PCI_EXP_DPC_STATUS_TRIGGER | PCI_EXP_DPC_STATUS_INTERRUPT);
+
+	return PCI_ERS_RESULT_RECOVERED;
+}
+
+static void interrupt_event_handler(struct work_struct *work)
+{
+	struct dpc_dev *dpc = container_of(work, struct dpc_dev, work);
+	struct pci_dev *pdev = dpc->dev->port;
+
+	/* From DPC point of view error is always FATAL. */
+	pci_do_recovery(pdev, PCI_ERR_DPC_FATAL);
 }
 
 static void dpc_rp_pio_print_tlp_header(struct device *dev,
@@ -339,6 +419,7 @@  static void dpc_remove(struct pcie_device *dev)
 	.service	= PCIE_PORT_SERVICE_DPC,
 	.probe		= dpc_probe,
 	.remove		= dpc_remove,
+	.reset_link     = dpc_reset_link,
 };
 
 static int __init dpc_service_init(void)
diff --git a/drivers/pci/pcie/pcie-err.c b/drivers/pci/pcie/pcie-err.c
index d59866c..8bac584 100644
--- a/drivers/pci/pcie/pcie-err.c
+++ b/drivers/pci/pcie/pcie-err.c
@@ -176,7 +176,7 @@  static pci_ers_result_t pci_default_reset_link(struct pci_dev *dev)
 	return PCI_ERS_RESULT_RECOVERED;
 }
 
-pci_ers_result_t pci_reset_link(struct pci_dev *dev)
+pci_ers_result_t pci_reset_link(struct pci_dev *dev, int severity)
 {
 	struct pci_dev *udev;
 	pci_ers_result_t status;
@@ -191,7 +191,10 @@  pci_ers_result_t pci_reset_link(struct pci_dev *dev)
 	}
 
 	/* Use the aer driver of the component firstly */
-	driver = pci_find_aer_service(udev);
+	if (severity == PCI_ERR_DPC_FATAL)
+		driver = pci_find_dpc_service(udev);
+	else
+		driver = pci_find_aer_service(udev);
 
 	if (driver && driver->reset_link) {
 		status = driver->reset_link(udev);
@@ -280,7 +283,8 @@  void pci_do_recovery(struct pci_dev *dev, int severity)
 
 	mutex_lock(&pci_err_recovery_lock);
 
-	if (severity == PCI_ERR_AER_FATAL)
+	if ((severity == PCI_ERR_AER_FATAL) ||
+	    (severity == PCI_ERR_DPC_FATAL))
 		state = pci_channel_io_frozen;
 	else
 		state = pci_channel_io_normal;
@@ -290,8 +294,9 @@  void pci_do_recovery(struct pci_dev *dev, int severity)
 			"error_detected",
 			pci_report_error_detected);
 
-	if (severity == PCI_ERR_AER_FATAL) {
-		result = pci_reset_link(dev);
+	if ((severity == PCI_ERR_AER_FATAL) ||
+	    (severity == PCI_ERR_DPC_FATAL)) {
+		result = pci_reset_link(dev, severity);
 		if (result != PCI_ERS_RESULT_RECOVERED)
 			goto failed;
 	}
diff --git a/drivers/pci/pcie/portdrv.h b/drivers/pci/pcie/portdrv.h
index 4f1992d..b013e24 100644
--- a/drivers/pci/pcie/portdrv.h
+++ b/drivers/pci/pcie/portdrv.h
@@ -80,4 +80,5 @@  static inline void pcie_port_platform_notify(struct pci_dev *port, int *mask){}
 #endif /* !CONFIG_ACPI */
 
 struct pcie_port_service_driver *pci_find_aer_service(struct pci_dev *dev);
+struct pcie_port_service_driver *pci_find_dpc_service(struct pci_dev *dev);
 #endif /* _PORTDRV_H_ */
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 083408e..123ee15 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -2005,6 +2005,7 @@  static inline resource_size_t pci_iov_resource_size(struct pci_dev *dev, int res
 #define PCI_ERR_AER_NONFATAL		0
 #define PCI_ERR_AER_FATAL		1
 #define PCI_ERR_AER_CORRECTABLE		2
+#define PCI_ERR_DPC_FATAL		4
 
 pci_ers_result_t pci_broadcast_error_message(struct pci_dev *dev,
 					enum pci_channel_state state,
@@ -2014,7 +2015,7 @@  pci_ers_result_t pci_broadcast_error_message(struct pci_dev *dev,
 int pci_report_slot_reset(struct pci_dev *dev, void *data);
 int pci_report_resume(struct pci_dev *dev, void *data);
 int pci_report_error_detected(struct pci_dev *dev, void *data);
-pci_ers_result_t pci_reset_link(struct pci_dev *dev);
+pci_ers_result_t pci_reset_link(struct pci_dev *dev, int severity);
 pci_ers_result_t pci_merge_result(enum pci_ers_result orig,
 				enum pci_ers_result new);
 void pci_do_recovery(struct pci_dev *dev, int severity);