diff mbox series

[4/5] megaraid_sas: Handle missing interrupts while re-enabling IRQs

Message ID 20210317190824.3050-5-chandrakanth.patil@broadcom.com (mailing list archive)
State Changes Requested
Headers show
Series megaraid_sas: Update driver version to 07.717.01.00-rc1 | expand

Commit Message

Chandrakanth Patil March 17, 2021, 7:08 p.m. UTC
While reenabling the IRQ after irq poll there may be a small window for
the firmware to post the replies with interrupts raised. In that case,
driver will not see the interrupts which lead to IOs timeout.

This issue hits only when there is a high IOs completion on a single reply
queue, which forces the driver to switch between the interrupt and IRQ
context.

To fix this, driver will process the reply queue one more time after
enabling the IRQ.

Link: https://lore.kernel.org/linux-scsi/20201102072746.27410-1-sreekanth.reddy@broadcom.com/
Cc: Tomas Henzl <thenzl@redhat.com>
Signed-off-by: Chandrakanth Patil <chandrakanth.patil@broadcom.com>
Signed-off-by: Sumit Saxena <sumit.saxena@broadcom.com>
---
 drivers/scsi/megaraid/megaraid_sas.h        |  1 +
 drivers/scsi/megaraid/megaraid_sas_base.c   |  2 ++
 drivers/scsi/megaraid/megaraid_sas_fusion.c | 22 +++++++++++++++------
 3 files changed, 19 insertions(+), 6 deletions(-)
diff mbox series

Patch

diff --git a/drivers/scsi/megaraid/megaraid_sas.h b/drivers/scsi/megaraid/megaraid_sas.h
index d7185aa21eb5..689bc519b4c5 100644
--- a/drivers/scsi/megaraid/megaraid_sas.h
+++ b/drivers/scsi/megaraid/megaraid_sas.h
@@ -2213,6 +2213,7 @@  struct megasas_irq_context {
 	struct irq_poll irqpoll;
 	bool irq_poll_scheduled;
 	bool irq_line_enable;
+	atomic_t in_use;
 };
 
 struct MR_DRV_SYSTEM_INFO {
diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c
index f3716f7e1d10..a3584b507749 100644
--- a/drivers/scsi/megaraid/megaraid_sas_base.c
+++ b/drivers/scsi/megaraid/megaraid_sas_base.c
@@ -5630,6 +5630,7 @@  megasas_setup_irqs_ioapic(struct megasas_instance *instance)
 	pdev = instance->pdev;
 	instance->irq_context[0].instance = instance;
 	instance->irq_context[0].MSIxIndex = 0;
+	atomic_set(&instance->irq_context[0].in_use, 0);
 	snprintf(instance->irq_context->name, MEGASAS_MSIX_NAME_LEN, "%s%u",
 		"megasas", instance->host->host_no);
 	if (request_irq(pci_irq_vector(pdev, 0),
@@ -5666,6 +5667,7 @@  megasas_setup_irqs_msix(struct megasas_instance *instance, u8 is_probe)
 	for (i = 0; i < instance->msix_vectors; i++) {
 		instance->irq_context[i].instance = instance;
 		instance->irq_context[i].MSIxIndex = i;
+		atomic_set(&instance->irq_context[i].in_use, 0);
 		snprintf(instance->irq_context[i].name, MEGASAS_MSIX_NAME_LEN, "%s%u-msix%u",
 			"megasas", instance->host->host_no, i);
 		if (request_irq(pci_irq_vector(pdev, i),
diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c
index 54f8a8073ca0..d151d2e0b1c8 100644
--- a/drivers/scsi/megaraid/megaraid_sas_fusion.c
+++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c
@@ -3478,7 +3478,7 @@  complete_cmd_fusion(struct megasas_instance *instance, u32 MSIxIndex,
 	struct fusion_context *fusion;
 	struct megasas_cmd *cmd_mfi;
 	struct megasas_cmd_fusion *cmd_fusion;
-	u16 smid, num_completed;
+	u16 smid, num_completed = 0;
 	u8 reply_descript_type, *sense, status, extStatus;
 	u32 device_id, data_length;
 	union desc_value d_val;
@@ -3493,6 +3493,9 @@  complete_cmd_fusion(struct megasas_instance *instance, u32 MSIxIndex,
 	if (atomic_read(&instance->adprecovery) == MEGASAS_HW_CRITICAL_ERROR)
 		return IRQ_HANDLED;
 
+	if (!atomic_add_unless(&irq_context->in_use, 1, 1))
+		return num_completed;
+
 	desc = fusion->reply_frames_desc[MSIxIndex] +
 				fusion->last_reply_idx[MSIxIndex];
 
@@ -3503,10 +3506,10 @@  complete_cmd_fusion(struct megasas_instance *instance, u32 MSIxIndex,
 	reply_descript_type = reply_desc->ReplyFlags &
 		MPI2_RPY_DESCRIPT_FLAGS_TYPE_MASK;
 
-	if (reply_descript_type == MPI2_RPY_DESCRIPT_FLAGS_UNUSED)
+	if (reply_descript_type == MPI2_RPY_DESCRIPT_FLAGS_UNUSED) {
+		atomic_dec(&irq_context->in_use);
 		return IRQ_NONE;
-
-	num_completed = 0;
+	}
 
 	while (d_val.u.low != cpu_to_le32(UINT_MAX) &&
 	       d_val.u.high != cpu_to_le32(UINT_MAX)) {
@@ -3619,6 +3622,7 @@  complete_cmd_fusion(struct megasas_instance *instance, u32 MSIxIndex,
 					irq_context->irq_line_enable = true;
 					irq_poll_sched(&irq_context->irqpoll);
 				}
+				atomic_dec(&irq_context->in_use);
 				return num_completed;
 			}
 		}
@@ -3636,6 +3640,7 @@  complete_cmd_fusion(struct megasas_instance *instance, u32 MSIxIndex,
 				instance->reply_post_host_index_addr[0]);
 		megasas_check_and_restore_queue_depth(instance);
 	}
+	atomic_dec(&irq_context->in_use);
 	return num_completed;
 }
 
@@ -3676,6 +3681,7 @@  static void megasas_sync_irqs(unsigned long instance_addr)
 		if (irq_ctx->irq_poll_scheduled) {
 			irq_ctx->irq_poll_scheduled = false;
 			enable_irq(irq_ctx->os_irq);
+			complete_cmd_fusion(instance, irq_ctx->MSIxIndex, irq_ctx);
 		}
 	}
 }
@@ -3707,6 +3713,7 @@  int megasas_irqpoll(struct irq_poll *irqpoll, int budget)
 		irq_poll_complete(irqpoll);
 		irq_ctx->irq_poll_scheduled = false;
 		enable_irq(irq_ctx->os_irq);
+		complete_cmd_fusion(instance, irq_ctx->MSIxIndex, irq_ctx);
 	}
 
 	return num_entries;
@@ -3723,6 +3730,7 @@  megasas_complete_cmd_dpc_fusion(unsigned long instance_addr)
 {
 	struct megasas_instance *instance =
 		(struct megasas_instance *)instance_addr;
+	struct megasas_irq_context *irq_ctx;
 	u32 count, MSIxIndex;
 
 	count = instance->msix_vectors > 0 ? instance->msix_vectors : 1;
@@ -3731,8 +3739,10 @@  megasas_complete_cmd_dpc_fusion(unsigned long instance_addr)
 	if (atomic_read(&instance->adprecovery) == MEGASAS_HW_CRITICAL_ERROR)
 		return;
 
-	for (MSIxIndex = 0 ; MSIxIndex < count; MSIxIndex++)
-		complete_cmd_fusion(instance, MSIxIndex, NULL);
+	for (MSIxIndex = 0 ; MSIxIndex < count; MSIxIndex++) {
+		irq_ctx = &instance->irq_context[MSIxIndex];
+		complete_cmd_fusion(instance, MSIxIndex, irq_ctx);
+	}
 }
 
 /**