@@ -37,6 +37,8 @@
#define SGES_PER_PAGE (PAGE_SIZE / sizeof(struct nvme_sgl_desc))
+#define MAX_RESET_FAILURES 4
+
static int use_threaded_interrupts;
module_param(use_threaded_interrupts, int, 0);
@@ -101,6 +103,8 @@ struct nvme_dev {
struct completion ioq_wait;
bool queues_froze;
+ int reset_failures;
+
/* shadow doorbell buffer support: */
u32 *dbbuf_dbs;
dma_addr_t dbbuf_dbs_dma_addr;
@@ -2307,9 +2311,23 @@ static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl)
kfree(dev);
}
-static void nvme_remove_dead_ctrl(struct nvme_dev *dev, int status)
+static void nvme_reset_failure(struct nvme_dev *dev, int status)
{
- dev_warn(dev->ctrl.device, "Removing after probe failure status: %d\n", status);
+ dev->reset_failures++;
+ dev_warn(dev->ctrl.device, "Reset failure status: %d, failures:%d\n",
+ status, dev->reset_failures);
+
+ /* IO and Interrupted Call may indicate a retryable error */
+ switch (status) {
+ case -EIO:
+ case -EINTR:
+ if (dev->reset_failures < MAX_RESET_FAILURES &&
+ !nvme_reset_ctrl(&dev->ctrl))
+ return;
+ break;
+ default:
+ break;
+ }
nvme_get_ctrl(&dev->ctrl);
nvme_dev_disable(dev, false);
@@ -2410,14 +2428,16 @@ static void nvme_reset_work(struct work_struct *work)
if (!nvme_change_ctrl_state(&dev->ctrl, new_state)) {
dev_warn(dev->ctrl.device,
"failed to mark controller state %d\n", new_state);
+ result = -ENODEV;
goto out;
}
+ dev->reset_failures = 0;
nvme_start_ctrl(&dev->ctrl);
return;
out:
- nvme_remove_dead_ctrl(dev, result);
+ nvme_reset_failure(dev, result);
}
static void nvme_remove_dead_ctrl_work(struct work_struct *work)
If the reset failed due to a non-fatal error, this patch will attempt to reset the controller again, with a maximum of 4 attempts. Since the failed reset case has changed purpose, this patch provides a more appropriate name and warning message for the reset failure. Signed-off-by: Keith Busch <keith.busch@intel.com> --- drivers/nvme/host/pci.c | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-)