diff mbox

[3/7] megaraid_sas: Do not fire DCMDs during PCI shutdown/detach

Message ID 1476699850-25083-4-git-send-email-sumit.saxena@broadcom.com (mailing list archive)
State Changes Requested, archived
Headers show

Commit Message

Sumit Saxena Oct. 17, 2016, 10:24 a.m. UTC
This patch addresses the issue of driver firing DCMDs in PCI
shutdown/detach path irrespective of firmware state.
Driver will check for whether firmware is operational state or not
before firing DCMDs. If firmware is in unrecoverbale
state or does not become operational within specfied time, driver will
skip firing DCMDs.

Signed-off-by: Sumit Saxena <sumit.saxena@broadcom.com>
---
 drivers/scsi/megaraid/megaraid_sas_base.c   | 46 +++++++++++++++++++++++++++++
 drivers/scsi/megaraid/megaraid_sas_fusion.c |  9 ++++--
 2 files changed, 52 insertions(+), 3 deletions(-)

Comments

Hannes Reinecke Oct. 17, 2016, 11:31 a.m. UTC | #1
On 10/17/2016 12:24 PM, Sumit Saxena wrote:
> This patch addresses the issue of driver firing DCMDs in PCI
> shutdown/detach path irrespective of firmware state.
> Driver will check for whether firmware is operational state or not
> before firing DCMDs. If firmware is in unrecoverbale
> state or does not become operational within specfied time, driver will
> skip firing DCMDs.
> 
> Signed-off-by: Sumit Saxena <sumit.saxena@broadcom.com>
> ---
>  drivers/scsi/megaraid/megaraid_sas_base.c   | 46 +++++++++++++++++++++++++++++
>  drivers/scsi/megaraid/megaraid_sas_fusion.c |  9 ++++--
>  2 files changed, 52 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c
> index 9ff57de..092387f 100644
> --- a/drivers/scsi/megaraid/megaraid_sas_base.c
> +++ b/drivers/scsi/megaraid/megaraid_sas_base.c
> @@ -6248,6 +6248,32 @@ fail_reenable_msix:
>  #define megasas_resume	NULL
>  #endif
>  
> +static inline int
> +megasas_wait_for_adapter_operational(struct megasas_instance *instance)
> +{
> +	int wait_time = MEGASAS_RESET_WAIT_TIME * 2;
> +	int i;
> +
> +	for (i = 0; i < wait_time; i++) {
> +		if (atomic_read(&instance->adprecovery)
> +			== MEGASAS_HBA_OPERATIONAL)
> +			break;
> +
> +		if (!(i % MEGASAS_RESET_NOTICE_INTERVAL))
> +			dev_notice(&instance->pdev->dev, "waiting for controller reset to finish\n");
> +
> +		msleep(1000);
> +	}
> +
> +	if (atomic_read(&instance->adprecovery) != MEGASAS_HBA_OPERATIONAL) {
> +		dev_info(&instance->pdev->dev, "%s timed out while waiting for HBA to recover.\n",
> +			__func__);
> +		return 1;
> +	}
> +
> +	return 0;
> +}
> +
>  /**
>   * megasas_detach_one -	PCI hot"un"plug entry point
>   * @pdev:		PCI device structure
> @@ -6272,9 +6298,18 @@ static void megasas_detach_one(struct pci_dev *pdev)
>  	if (instance->fw_crash_state != UNAVAILABLE)
>  		megasas_free_host_crash_buffer(instance);
>  	scsi_remove_host(instance->host);
> +
> +	msleep(1000);
> +	if ((atomic_read(&instance->adprecovery) == MEGASAS_HW_CRITICAL_ERROR)
> +		|| ((atomic_read(&instance->adprecovery)
> +			!= MEGASAS_HBA_OPERATIONAL) &&
> +		megasas_wait_for_adapter_operational(instance)))
> +		goto skip_firing_dcmds;
> +
>  	megasas_flush_cache(instance);
>  	megasas_shutdown_controller(instance, MR_DCMD_CTRL_SHUTDOWN);
>  
> +skip_firing_dcmds:
>  	/* cancel the delayed work if this work still in queue*/
>  	if (instance->ev != NULL) {
>  		struct megasas_aen_event *ev = instance->ev;
Why not move the 'msleep' and 'atomic_read' into the call to
megasas_wait_for_adapter_operational?
Plus I'm not sure if the unconditional 'msleep' is a good idea here;
maybe one should read 'adprecovery' first and _then_ call msleep if
required?

Cheers,

Hannes
Sumit Saxena Oct. 17, 2016, 12:19 p.m. UTC | #2
>-----Original Message-----
>From: Hannes Reinecke [mailto:hare@suse.de]
>Sent: Monday, October 17, 2016 5:01 PM
>To: Sumit Saxena; linux-scsi@vger.kernel.org
>Cc: martin.petersen@oracle.com; thenzl@redhat.com;
jejb@linux.vnet.ibm.com;
>kashyap.desai@broadcom.com
>Subject: Re: [PATCH 3/7] megaraid_sas: Do not fire DCMDs during PCI
>shutdown/detach
>
>On 10/17/2016 12:24 PM, Sumit Saxena wrote:
>> This patch addresses the issue of driver firing DCMDs in PCI
>> shutdown/detach path irrespective of firmware state.
>> Driver will check for whether firmware is operational state or not
>> before firing DCMDs. If firmware is in unrecoverbale state or does not
>> become operational within specfied time, driver will skip firing
>> DCMDs.
>>
>> Signed-off-by: Sumit Saxena <sumit.saxena@broadcom.com>
>> ---
>>  drivers/scsi/megaraid/megaraid_sas_base.c   | 46
>+++++++++++++++++++++++++++++
>>  drivers/scsi/megaraid/megaraid_sas_fusion.c |  9 ++++--
>>  2 files changed, 52 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c
>> b/drivers/scsi/megaraid/megaraid_sas_base.c
>> index 9ff57de..092387f 100644
>> --- a/drivers/scsi/megaraid/megaraid_sas_base.c
>> +++ b/drivers/scsi/megaraid/megaraid_sas_base.c
>> @@ -6248,6 +6248,32 @@ fail_reenable_msix:
>>  #define megasas_resume	NULL
>>  #endif
>>
>> +static inline int
>> +megasas_wait_for_adapter_operational(struct megasas_instance
>> +*instance) {
>> +	int wait_time = MEGASAS_RESET_WAIT_TIME * 2;
>> +	int i;
>> +
>> +	for (i = 0; i < wait_time; i++) {
>> +		if (atomic_read(&instance->adprecovery)
>> +			== MEGASAS_HBA_OPERATIONAL)
>> +			break;
>> +
>> +		if (!(i % MEGASAS_RESET_NOTICE_INTERVAL))
>> +			dev_notice(&instance->pdev->dev, "waiting for
>controller reset to
>> +finish\n");
>> +
>> +		msleep(1000);
>> +	}
>> +
>> +	if (atomic_read(&instance->adprecovery) !=
>MEGASAS_HBA_OPERATIONAL) {
>> +		dev_info(&instance->pdev->dev, "%s timed out while waiting
for
>HBA to recover.\n",
>> +			__func__);
>> +		return 1;
>> +	}
>> +
>> +	return 0;
>> +}
>> +
>>  /**
>>   * megasas_detach_one -	PCI hot"un"plug entry point
>>   * @pdev:		PCI device structure
>> @@ -6272,9 +6298,18 @@ static void megasas_detach_one(struct pci_dev
>*pdev)
>>  	if (instance->fw_crash_state != UNAVAILABLE)
>>  		megasas_free_host_crash_buffer(instance);
>>  	scsi_remove_host(instance->host);
>> +
>> +	msleep(1000);
>> +	if ((atomic_read(&instance->adprecovery) ==
>MEGASAS_HW_CRITICAL_ERROR)
>> +		|| ((atomic_read(&instance->adprecovery)
>> +			!= MEGASAS_HBA_OPERATIONAL) &&
>> +		megasas_wait_for_adapter_operational(instance)))
>> +		goto skip_firing_dcmds;
>> +
>>  	megasas_flush_cache(instance);
>>  	megasas_shutdown_controller(instance,
>MR_DCMD_CTRL_SHUTDOWN);
>>
>> +skip_firing_dcmds:
>>  	/* cancel the delayed work if this work still in queue*/
>>  	if (instance->ev != NULL) {
>>  		struct megasas_aen_event *ev = instance->ev;
>Why not move the 'msleep' and 'atomic_read' into the call to
>megasas_wait_for_adapter_operational?
I will rectify this in next version of this patch.

>Plus I'm not sure if the unconditional 'msleep' is a good idea here;
maybe one
>should read 'adprecovery' first and _then_ call msleep if required?
Agreed.. I will cleanup this and resend the patch.

Thanks,
Sumit
>
>Cheers,
>
>Hannes
>--
>Dr. Hannes Reinecke		   Teamlead Storage & Networking
>hare@suse.de			               +49 911 74053 688
>SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
>GF: F. Imendörffer, J. Smithard, J. Guild, D. Upmanyu, G. Norton HRB
21284 (AG
>Nürnberg)
--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c
index 9ff57de..092387f 100644
--- a/drivers/scsi/megaraid/megaraid_sas_base.c
+++ b/drivers/scsi/megaraid/megaraid_sas_base.c
@@ -6248,6 +6248,32 @@  fail_reenable_msix:
 #define megasas_resume	NULL
 #endif
 
+static inline int
+megasas_wait_for_adapter_operational(struct megasas_instance *instance)
+{
+	int wait_time = MEGASAS_RESET_WAIT_TIME * 2;
+	int i;
+
+	for (i = 0; i < wait_time; i++) {
+		if (atomic_read(&instance->adprecovery)
+			== MEGASAS_HBA_OPERATIONAL)
+			break;
+
+		if (!(i % MEGASAS_RESET_NOTICE_INTERVAL))
+			dev_notice(&instance->pdev->dev, "waiting for controller reset to finish\n");
+
+		msleep(1000);
+	}
+
+	if (atomic_read(&instance->adprecovery) != MEGASAS_HBA_OPERATIONAL) {
+		dev_info(&instance->pdev->dev, "%s timed out while waiting for HBA to recover.\n",
+			__func__);
+		return 1;
+	}
+
+	return 0;
+}
+
 /**
  * megasas_detach_one -	PCI hot"un"plug entry point
  * @pdev:		PCI device structure
@@ -6272,9 +6298,18 @@  static void megasas_detach_one(struct pci_dev *pdev)
 	if (instance->fw_crash_state != UNAVAILABLE)
 		megasas_free_host_crash_buffer(instance);
 	scsi_remove_host(instance->host);
+
+	msleep(1000);
+	if ((atomic_read(&instance->adprecovery) == MEGASAS_HW_CRITICAL_ERROR)
+		|| ((atomic_read(&instance->adprecovery)
+			!= MEGASAS_HBA_OPERATIONAL) &&
+		megasas_wait_for_adapter_operational(instance)))
+		goto skip_firing_dcmds;
+
 	megasas_flush_cache(instance);
 	megasas_shutdown_controller(instance, MR_DCMD_CTRL_SHUTDOWN);
 
+skip_firing_dcmds:
 	/* cancel the delayed work if this work still in queue*/
 	if (instance->ev != NULL) {
 		struct megasas_aen_event *ev = instance->ev;
@@ -6388,8 +6423,19 @@  static void megasas_shutdown(struct pci_dev *pdev)
 	struct megasas_instance *instance = pci_get_drvdata(pdev);
 
 	instance->unload = 1;
+
+	msleep(1000);
+
+	if ((atomic_read(&instance->adprecovery) == MEGASAS_HW_CRITICAL_ERROR)
+		|| ((atomic_read(&instance->adprecovery)
+			!= MEGASAS_HBA_OPERATIONAL)
+		&& megasas_wait_for_adapter_operational(instance)))
+		goto skip_firing_dcmds;
+
 	megasas_flush_cache(instance);
 	megasas_shutdown_controller(instance, MR_DCMD_CTRL_SHUTDOWN);
+
+skip_firing_dcmds:
 	instance->instancet->disable_intr(instance);
 	megasas_destroy_irqs(instance);
 
diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c
index 61be7ed..2159f6a 100644
--- a/drivers/scsi/megaraid/megaraid_sas_fusion.c
+++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c
@@ -2463,12 +2463,15 @@  irqreturn_t megasas_isr_fusion(int irq, void *devp)
 			/* Start collecting crash, if DMA bit is done */
 			if ((fw_state == MFI_STATE_FAULT) && dma_state)
 				schedule_work(&instance->crash_init);
-			else if (fw_state == MFI_STATE_FAULT)
-				schedule_work(&instance->work_init);
+			else if (fw_state == MFI_STATE_FAULT) {
+				if (instance->unload == 0)
+					schedule_work(&instance->work_init);
+			}
 		} else if (fw_state == MFI_STATE_FAULT) {
 			dev_warn(&instance->pdev->dev, "Iop2SysDoorbellInt"
 			       "for scsi%d\n", instance->host->host_no);
-			schedule_work(&instance->work_init);
+			if (instance->unload == 0)
+				schedule_work(&instance->work_init);
 		}
 	}