diff mbox series

[v7,7/8] scsi: ufs: Move dumps in IRQ handler to error handler

Message ID 1595912460-8860-8-git-send-email-cang@codeaurora.org (mailing list archive)
State Superseded
Headers show
Series Fix up and simplify error recovery mechanism | expand

Commit Message

Can Guo July 28, 2020, 5 a.m. UTC
Sometime dumps in IRQ handler are heavy enough to cause system stability
issues, move them to error handler.

Signed-off-by: Can Guo <cang@codeaurora.org>
---
 drivers/scsi/ufs/ufshcd.c | 31 +++++++++++++++----------------
 1 file changed, 15 insertions(+), 16 deletions(-)

Comments

Asutosh Das (asd) July 28, 2020, 6:06 p.m. UTC | #1
On 7/27/2020 10:00 PM, Can Guo wrote:
> Sometime dumps in IRQ handler are heavy enough to cause system stability
> issues, move them to error handler.
> 
> Signed-off-by: Can Guo <cang@codeaurora.org>
> ---
>   drivers/scsi/ufs/ufshcd.c | 31 +++++++++++++++----------------
>   1 file changed, 15 insertions(+), 16 deletions(-)
> 
> diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
> index c480823..b2bafa3 100644
> --- a/drivers/scsi/ufs/ufshcd.c
> +++ b/drivers/scsi/ufs/ufshcd.c
> @@ -5682,6 +5682,21 @@ static void ufshcd_err_handler(struct work_struct *work)
>   				    UFSHCD_UIC_DL_TCx_REPLAY_ERROR))))
>   		needs_reset = true;
>   
> +	if (hba->saved_err & (INT_FATAL_ERRORS | UIC_ERROR |
> +			      UFSHCD_UIC_HIBERN8_MASK)) {
> +		bool pr_prdt = !!(hba->saved_err & SYSTEM_BUS_FATAL_ERROR);
> +
> +		dev_err(hba->dev, "%s: saved_err 0x%x saved_uic_err 0x%x\n",
> +				__func__, hba->saved_err, hba->saved_uic_err);
> +		spin_unlock_irqrestore(hba->host->host_lock, flags);
> +		ufshcd_print_host_state(hba);
> +		ufshcd_print_pwr_info(hba);
> +		ufshcd_print_host_regs(hba);
> +		ufshcd_print_tmrs(hba, hba->outstanding_tasks);
> +		ufshcd_print_trs(hba, hba->outstanding_reqs, pr_prdt);
> +		spin_lock_irqsave(hba->host->host_lock, flags);
> +	}
> +
>   	/*
>   	 * if host reset is required then skip clearing the pending
>   	 * transfers forcefully because they will get cleared during
> @@ -5900,22 +5915,6 @@ static irqreturn_t ufshcd_check_errors(struct ufs_hba *hba)
>   
>   		/* block commands from scsi mid-layer */
>   		ufshcd_scsi_block_requests(hba);
> -
> -		/* dump controller state before resetting */
> -		if (hba->saved_err & (INT_FATAL_ERRORS | UIC_ERROR)) {
> -			bool pr_prdt = !!(hba->saved_err &
> -					SYSTEM_BUS_FATAL_ERROR);
> -
> -			dev_err(hba->dev, "%s: saved_err 0x%x saved_uic_err 0x%x\n",
> -					__func__, hba->saved_err,
> -					hba->saved_uic_err);
> -
> -			ufshcd_print_host_regs(hba);
> -			ufshcd_print_pwr_info(hba);
How about keep the above prints and move the tmrs and trs to eh?
Sometimes in system instability, the eh may not get a chance to run 
even. Still the above prints would provide some clues.
> -			ufshcd_print_tmrs(hba, hba->outstanding_tasks);
> -			ufshcd_print_trs(hba, hba->outstanding_reqs,
> -					pr_prdt);
> -		}
>   		ufshcd_schedule_eh_work(hba);
>   		retval |= IRQ_HANDLED;
>   	}
>
Can Guo July 29, 2020, 1:02 p.m. UTC | #2
Hi Asutosh,

On 2020-07-29 02:06, Asutosh Das (asd) wrote:
> On 7/27/2020 10:00 PM, Can Guo wrote:
>> Sometime dumps in IRQ handler are heavy enough to cause system 
>> stability
>> issues, move them to error handler.
>> 
>> Signed-off-by: Can Guo <cang@codeaurora.org>
>> ---
>>   drivers/scsi/ufs/ufshcd.c | 31 +++++++++++++++----------------
>>   1 file changed, 15 insertions(+), 16 deletions(-)
>> 
>> diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
>> index c480823..b2bafa3 100644
>> --- a/drivers/scsi/ufs/ufshcd.c
>> +++ b/drivers/scsi/ufs/ufshcd.c
>> @@ -5682,6 +5682,21 @@ static void ufshcd_err_handler(struct 
>> work_struct *work)
>>   				    UFSHCD_UIC_DL_TCx_REPLAY_ERROR))))
>>   		needs_reset = true;
>>   +	if (hba->saved_err & (INT_FATAL_ERRORS | UIC_ERROR |
>> +			      UFSHCD_UIC_HIBERN8_MASK)) {
>> +		bool pr_prdt = !!(hba->saved_err & SYSTEM_BUS_FATAL_ERROR);
>> +
>> +		dev_err(hba->dev, "%s: saved_err 0x%x saved_uic_err 0x%x\n",
>> +				__func__, hba->saved_err, hba->saved_uic_err);
>> +		spin_unlock_irqrestore(hba->host->host_lock, flags);
>> +		ufshcd_print_host_state(hba);
>> +		ufshcd_print_pwr_info(hba);
>> +		ufshcd_print_host_regs(hba);
>> +		ufshcd_print_tmrs(hba, hba->outstanding_tasks);
>> +		ufshcd_print_trs(hba, hba->outstanding_reqs, pr_prdt);
>> +		spin_lock_irqsave(hba->host->host_lock, flags);
>> +	}
>> +
>>   	/*
>>   	 * if host reset is required then skip clearing the pending
>>   	 * transfers forcefully because they will get cleared during
>> @@ -5900,22 +5915,6 @@ static irqreturn_t ufshcd_check_errors(struct 
>> ufs_hba *hba)
>>     		/* block commands from scsi mid-layer */
>>   		ufshcd_scsi_block_requests(hba);
>> -
>> -		/* dump controller state before resetting */
>> -		if (hba->saved_err & (INT_FATAL_ERRORS | UIC_ERROR)) {
>> -			bool pr_prdt = !!(hba->saved_err &
>> -					SYSTEM_BUS_FATAL_ERROR);
>> -
>> -			dev_err(hba->dev, "%s: saved_err 0x%x saved_uic_err 0x%x\n",
>> -					__func__, hba->saved_err,
>> -					hba->saved_uic_err);
>> -
>> -			ufshcd_print_host_regs(hba);
>> -			ufshcd_print_pwr_info(hba);
> How about keep the above prints and move the tmrs and trs to eh?
> Sometimes in system instability, the eh may not get a chance to run
> even. Still the above prints would provide some clues.

Here is the IRQ handler, ufshcd_print_host_regs() is sometime heavy
enough to cause stability issues during my fault injection test, since
it prints host regs, reg's history, crypto debug infos plus prints
from vops_dump.

How about just printing host regs and reg history here? Most time, these
infos are enough.

Thanks,

Can Guo.

>> -			ufshcd_print_tmrs(hba, hba->outstanding_tasks);
>> -			ufshcd_print_trs(hba, hba->outstanding_reqs,
>> -					pr_prdt);
>> -		}
>>   		ufshcd_schedule_eh_work(hba);
>>   		retval |= IRQ_HANDLED;
>>   	}
>>
Asutosh Das (asd) July 29, 2020, 10:45 p.m. UTC | #3
On 7/29/2020 6:02 AM, Can Guo wrote:
> Hi Asutosh,
> 
> On 2020-07-29 02:06, Asutosh Das (asd) wrote:
>> On 7/27/2020 10:00 PM, Can Guo wrote:
>>> Sometime dumps in IRQ handler are heavy enough to cause system stability
>>> issues, move them to error handler.
>>>
>>> Signed-off-by: Can Guo <cang@codeaurora.org>
>>> ---
>>>   drivers/scsi/ufs/ufshcd.c | 31 +++++++++++++++----------------
>>>   1 file changed, 15 insertions(+), 16 deletions(-)
>>>
>>> diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
>>> index c480823..b2bafa3 100644
>>> --- a/drivers/scsi/ufs/ufshcd.c
>>> +++ b/drivers/scsi/ufs/ufshcd.c
>>> @@ -5682,6 +5682,21 @@ static void ufshcd_err_handler(struct 
>>> work_struct *work)
>>>                       UFSHCD_UIC_DL_TCx_REPLAY_ERROR))))
>>>           needs_reset = true;
>>>   +    if (hba->saved_err & (INT_FATAL_ERRORS | UIC_ERROR |
>>> +                  UFSHCD_UIC_HIBERN8_MASK)) {
>>> +        bool pr_prdt = !!(hba->saved_err & SYSTEM_BUS_FATAL_ERROR);
>>> +
>>> +        dev_err(hba->dev, "%s: saved_err 0x%x saved_uic_err 0x%x\n",
>>> +                __func__, hba->saved_err, hba->saved_uic_err);
>>> +        spin_unlock_irqrestore(hba->host->host_lock, flags);
>>> +        ufshcd_print_host_state(hba);
>>> +        ufshcd_print_pwr_info(hba);
>>> +        ufshcd_print_host_regs(hba);
>>> +        ufshcd_print_tmrs(hba, hba->outstanding_tasks);
>>> +        ufshcd_print_trs(hba, hba->outstanding_reqs, pr_prdt);
>>> +        spin_lock_irqsave(hba->host->host_lock, flags);
>>> +    }
>>> +
>>>       /*
>>>        * if host reset is required then skip clearing the pending
>>>        * transfers forcefully because they will get cleared during
>>> @@ -5900,22 +5915,6 @@ static irqreturn_t ufshcd_check_errors(struct 
>>> ufs_hba *hba)
>>>             /* block commands from scsi mid-layer */
>>>           ufshcd_scsi_block_requests(hba);
>>> -
>>> -        /* dump controller state before resetting */
>>> -        if (hba->saved_err & (INT_FATAL_ERRORS | UIC_ERROR)) {
>>> -            bool pr_prdt = !!(hba->saved_err &
>>> -                    SYSTEM_BUS_FATAL_ERROR);
>>> -
>>> -            dev_err(hba->dev, "%s: saved_err 0x%x saved_uic_err 
>>> 0x%x\n",
>>> -                    __func__, hba->saved_err,
>>> -                    hba->saved_uic_err);
>>> -
>>> -            ufshcd_print_host_regs(hba);
>>> -            ufshcd_print_pwr_info(hba);
>> How about keep the above prints and move the tmrs and trs to eh?
>> Sometimes in system instability, the eh may not get a chance to run
>> even. Still the above prints would provide some clues.
> 
> Here is the IRQ handler, ufshcd_print_host_regs() is sometime heavy
> enough to cause stability issues during my fault injection test, since
> it prints host regs, reg's history, crypto debug infos plus prints
> from vops_dump.
> 
> How about just printing host regs and reg history here? Most time, these
> infos are enough.
> 
That'd work too.

> Thanks,
> 
> Can Guo.
> 
>>> -            ufshcd_print_tmrs(hba, hba->outstanding_tasks);
>>> -            ufshcd_print_trs(hba, hba->outstanding_reqs,
>>> -                    pr_prdt);
>>> -        }
>>>           ufshcd_schedule_eh_work(hba);
>>>           retval |= IRQ_HANDLED;
>>>       }
>>>
diff mbox series

Patch

diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index c480823..b2bafa3 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -5682,6 +5682,21 @@  static void ufshcd_err_handler(struct work_struct *work)
 				    UFSHCD_UIC_DL_TCx_REPLAY_ERROR))))
 		needs_reset = true;
 
+	if (hba->saved_err & (INT_FATAL_ERRORS | UIC_ERROR |
+			      UFSHCD_UIC_HIBERN8_MASK)) {
+		bool pr_prdt = !!(hba->saved_err & SYSTEM_BUS_FATAL_ERROR);
+
+		dev_err(hba->dev, "%s: saved_err 0x%x saved_uic_err 0x%x\n",
+				__func__, hba->saved_err, hba->saved_uic_err);
+		spin_unlock_irqrestore(hba->host->host_lock, flags);
+		ufshcd_print_host_state(hba);
+		ufshcd_print_pwr_info(hba);
+		ufshcd_print_host_regs(hba);
+		ufshcd_print_tmrs(hba, hba->outstanding_tasks);
+		ufshcd_print_trs(hba, hba->outstanding_reqs, pr_prdt);
+		spin_lock_irqsave(hba->host->host_lock, flags);
+	}
+
 	/*
 	 * if host reset is required then skip clearing the pending
 	 * transfers forcefully because they will get cleared during
@@ -5900,22 +5915,6 @@  static irqreturn_t ufshcd_check_errors(struct ufs_hba *hba)
 
 		/* block commands from scsi mid-layer */
 		ufshcd_scsi_block_requests(hba);
-
-		/* dump controller state before resetting */
-		if (hba->saved_err & (INT_FATAL_ERRORS | UIC_ERROR)) {
-			bool pr_prdt = !!(hba->saved_err &
-					SYSTEM_BUS_FATAL_ERROR);
-
-			dev_err(hba->dev, "%s: saved_err 0x%x saved_uic_err 0x%x\n",
-					__func__, hba->saved_err,
-					hba->saved_uic_err);
-
-			ufshcd_print_host_regs(hba);
-			ufshcd_print_pwr_info(hba);
-			ufshcd_print_tmrs(hba, hba->outstanding_tasks);
-			ufshcd_print_trs(hba, hba->outstanding_reqs,
-					pr_prdt);
-		}
 		ufshcd_schedule_eh_work(hba);
 		retval |= IRQ_HANDLED;
 	}