Message ID | 1595912460-8860-8-git-send-email-cang@codeaurora.org (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | Fix up and simplify error recovery mechanism | expand |
On 7/27/2020 10:00 PM, Can Guo wrote: > Sometime dumps in IRQ handler are heavy enough to cause system stability > issues, move them to error handler. > > Signed-off-by: Can Guo <cang@codeaurora.org> > --- > drivers/scsi/ufs/ufshcd.c | 31 +++++++++++++++---------------- > 1 file changed, 15 insertions(+), 16 deletions(-) > > diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c > index c480823..b2bafa3 100644 > --- a/drivers/scsi/ufs/ufshcd.c > +++ b/drivers/scsi/ufs/ufshcd.c > @@ -5682,6 +5682,21 @@ static void ufshcd_err_handler(struct work_struct *work) > UFSHCD_UIC_DL_TCx_REPLAY_ERROR)))) > needs_reset = true; > > + if (hba->saved_err & (INT_FATAL_ERRORS | UIC_ERROR | > + UFSHCD_UIC_HIBERN8_MASK)) { > + bool pr_prdt = !!(hba->saved_err & SYSTEM_BUS_FATAL_ERROR); > + > + dev_err(hba->dev, "%s: saved_err 0x%x saved_uic_err 0x%x\n", > + __func__, hba->saved_err, hba->saved_uic_err); > + spin_unlock_irqrestore(hba->host->host_lock, flags); > + ufshcd_print_host_state(hba); > + ufshcd_print_pwr_info(hba); > + ufshcd_print_host_regs(hba); > + ufshcd_print_tmrs(hba, hba->outstanding_tasks); > + ufshcd_print_trs(hba, hba->outstanding_reqs, pr_prdt); > + spin_lock_irqsave(hba->host->host_lock, flags); > + } > + > /* > * if host reset is required then skip clearing the pending > * transfers forcefully because they will get cleared during > @@ -5900,22 +5915,6 @@ static irqreturn_t ufshcd_check_errors(struct ufs_hba *hba) > > /* block commands from scsi mid-layer */ > ufshcd_scsi_block_requests(hba); > - > - /* dump controller state before resetting */ > - if (hba->saved_err & (INT_FATAL_ERRORS | UIC_ERROR)) { > - bool pr_prdt = !!(hba->saved_err & > - SYSTEM_BUS_FATAL_ERROR); > - > - dev_err(hba->dev, "%s: saved_err 0x%x saved_uic_err 0x%x\n", > - __func__, hba->saved_err, > - hba->saved_uic_err); > - > - ufshcd_print_host_regs(hba); > - ufshcd_print_pwr_info(hba); How about keep the above prints and move the tmrs and trs to eh? Sometimes in system instability, the eh may not get a chance to run even. Still the above prints would provide some clues. > - ufshcd_print_tmrs(hba, hba->outstanding_tasks); > - ufshcd_print_trs(hba, hba->outstanding_reqs, > - pr_prdt); > - } > ufshcd_schedule_eh_work(hba); > retval |= IRQ_HANDLED; > } >
Hi Asutosh, On 2020-07-29 02:06, Asutosh Das (asd) wrote: > On 7/27/2020 10:00 PM, Can Guo wrote: >> Sometime dumps in IRQ handler are heavy enough to cause system >> stability >> issues, move them to error handler. >> >> Signed-off-by: Can Guo <cang@codeaurora.org> >> --- >> drivers/scsi/ufs/ufshcd.c | 31 +++++++++++++++---------------- >> 1 file changed, 15 insertions(+), 16 deletions(-) >> >> diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c >> index c480823..b2bafa3 100644 >> --- a/drivers/scsi/ufs/ufshcd.c >> +++ b/drivers/scsi/ufs/ufshcd.c >> @@ -5682,6 +5682,21 @@ static void ufshcd_err_handler(struct >> work_struct *work) >> UFSHCD_UIC_DL_TCx_REPLAY_ERROR)))) >> needs_reset = true; >> + if (hba->saved_err & (INT_FATAL_ERRORS | UIC_ERROR | >> + UFSHCD_UIC_HIBERN8_MASK)) { >> + bool pr_prdt = !!(hba->saved_err & SYSTEM_BUS_FATAL_ERROR); >> + >> + dev_err(hba->dev, "%s: saved_err 0x%x saved_uic_err 0x%x\n", >> + __func__, hba->saved_err, hba->saved_uic_err); >> + spin_unlock_irqrestore(hba->host->host_lock, flags); >> + ufshcd_print_host_state(hba); >> + ufshcd_print_pwr_info(hba); >> + ufshcd_print_host_regs(hba); >> + ufshcd_print_tmrs(hba, hba->outstanding_tasks); >> + ufshcd_print_trs(hba, hba->outstanding_reqs, pr_prdt); >> + spin_lock_irqsave(hba->host->host_lock, flags); >> + } >> + >> /* >> * if host reset is required then skip clearing the pending >> * transfers forcefully because they will get cleared during >> @@ -5900,22 +5915,6 @@ static irqreturn_t ufshcd_check_errors(struct >> ufs_hba *hba) >> /* block commands from scsi mid-layer */ >> ufshcd_scsi_block_requests(hba); >> - >> - /* dump controller state before resetting */ >> - if (hba->saved_err & (INT_FATAL_ERRORS | UIC_ERROR)) { >> - bool pr_prdt = !!(hba->saved_err & >> - SYSTEM_BUS_FATAL_ERROR); >> - >> - dev_err(hba->dev, "%s: saved_err 0x%x saved_uic_err 0x%x\n", >> - __func__, hba->saved_err, >> - hba->saved_uic_err); >> - >> - ufshcd_print_host_regs(hba); >> - ufshcd_print_pwr_info(hba); > How about keep the above prints and move the tmrs and trs to eh? > Sometimes in system instability, the eh may not get a chance to run > even. Still the above prints would provide some clues. Here is the IRQ handler, ufshcd_print_host_regs() is sometime heavy enough to cause stability issues during my fault injection test, since it prints host regs, reg's history, crypto debug infos plus prints from vops_dump. How about just printing host regs and reg history here? Most time, these infos are enough. Thanks, Can Guo. >> - ufshcd_print_tmrs(hba, hba->outstanding_tasks); >> - ufshcd_print_trs(hba, hba->outstanding_reqs, >> - pr_prdt); >> - } >> ufshcd_schedule_eh_work(hba); >> retval |= IRQ_HANDLED; >> } >>
On 7/29/2020 6:02 AM, Can Guo wrote: > Hi Asutosh, > > On 2020-07-29 02:06, Asutosh Das (asd) wrote: >> On 7/27/2020 10:00 PM, Can Guo wrote: >>> Sometime dumps in IRQ handler are heavy enough to cause system stability >>> issues, move them to error handler. >>> >>> Signed-off-by: Can Guo <cang@codeaurora.org> >>> --- >>> drivers/scsi/ufs/ufshcd.c | 31 +++++++++++++++---------------- >>> 1 file changed, 15 insertions(+), 16 deletions(-) >>> >>> diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c >>> index c480823..b2bafa3 100644 >>> --- a/drivers/scsi/ufs/ufshcd.c >>> +++ b/drivers/scsi/ufs/ufshcd.c >>> @@ -5682,6 +5682,21 @@ static void ufshcd_err_handler(struct >>> work_struct *work) >>> UFSHCD_UIC_DL_TCx_REPLAY_ERROR)))) >>> needs_reset = true; >>> + if (hba->saved_err & (INT_FATAL_ERRORS | UIC_ERROR | >>> + UFSHCD_UIC_HIBERN8_MASK)) { >>> + bool pr_prdt = !!(hba->saved_err & SYSTEM_BUS_FATAL_ERROR); >>> + >>> + dev_err(hba->dev, "%s: saved_err 0x%x saved_uic_err 0x%x\n", >>> + __func__, hba->saved_err, hba->saved_uic_err); >>> + spin_unlock_irqrestore(hba->host->host_lock, flags); >>> + ufshcd_print_host_state(hba); >>> + ufshcd_print_pwr_info(hba); >>> + ufshcd_print_host_regs(hba); >>> + ufshcd_print_tmrs(hba, hba->outstanding_tasks); >>> + ufshcd_print_trs(hba, hba->outstanding_reqs, pr_prdt); >>> + spin_lock_irqsave(hba->host->host_lock, flags); >>> + } >>> + >>> /* >>> * if host reset is required then skip clearing the pending >>> * transfers forcefully because they will get cleared during >>> @@ -5900,22 +5915,6 @@ static irqreturn_t ufshcd_check_errors(struct >>> ufs_hba *hba) >>> /* block commands from scsi mid-layer */ >>> ufshcd_scsi_block_requests(hba); >>> - >>> - /* dump controller state before resetting */ >>> - if (hba->saved_err & (INT_FATAL_ERRORS | UIC_ERROR)) { >>> - bool pr_prdt = !!(hba->saved_err & >>> - SYSTEM_BUS_FATAL_ERROR); >>> - >>> - dev_err(hba->dev, "%s: saved_err 0x%x saved_uic_err >>> 0x%x\n", >>> - __func__, hba->saved_err, >>> - hba->saved_uic_err); >>> - >>> - ufshcd_print_host_regs(hba); >>> - ufshcd_print_pwr_info(hba); >> How about keep the above prints and move the tmrs and trs to eh? >> Sometimes in system instability, the eh may not get a chance to run >> even. Still the above prints would provide some clues. > > Here is the IRQ handler, ufshcd_print_host_regs() is sometime heavy > enough to cause stability issues during my fault injection test, since > it prints host regs, reg's history, crypto debug infos plus prints > from vops_dump. > > How about just printing host regs and reg history here? Most time, these > infos are enough. > That'd work too. > Thanks, > > Can Guo. > >>> - ufshcd_print_tmrs(hba, hba->outstanding_tasks); >>> - ufshcd_print_trs(hba, hba->outstanding_reqs, >>> - pr_prdt); >>> - } >>> ufshcd_schedule_eh_work(hba); >>> retval |= IRQ_HANDLED; >>> } >>>
diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index c480823..b2bafa3 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -5682,6 +5682,21 @@ static void ufshcd_err_handler(struct work_struct *work) UFSHCD_UIC_DL_TCx_REPLAY_ERROR)))) needs_reset = true; + if (hba->saved_err & (INT_FATAL_ERRORS | UIC_ERROR | + UFSHCD_UIC_HIBERN8_MASK)) { + bool pr_prdt = !!(hba->saved_err & SYSTEM_BUS_FATAL_ERROR); + + dev_err(hba->dev, "%s: saved_err 0x%x saved_uic_err 0x%x\n", + __func__, hba->saved_err, hba->saved_uic_err); + spin_unlock_irqrestore(hba->host->host_lock, flags); + ufshcd_print_host_state(hba); + ufshcd_print_pwr_info(hba); + ufshcd_print_host_regs(hba); + ufshcd_print_tmrs(hba, hba->outstanding_tasks); + ufshcd_print_trs(hba, hba->outstanding_reqs, pr_prdt); + spin_lock_irqsave(hba->host->host_lock, flags); + } + /* * if host reset is required then skip clearing the pending * transfers forcefully because they will get cleared during @@ -5900,22 +5915,6 @@ static irqreturn_t ufshcd_check_errors(struct ufs_hba *hba) /* block commands from scsi mid-layer */ ufshcd_scsi_block_requests(hba); - - /* dump controller state before resetting */ - if (hba->saved_err & (INT_FATAL_ERRORS | UIC_ERROR)) { - bool pr_prdt = !!(hba->saved_err & - SYSTEM_BUS_FATAL_ERROR); - - dev_err(hba->dev, "%s: saved_err 0x%x saved_uic_err 0x%x\n", - __func__, hba->saved_err, - hba->saved_uic_err); - - ufshcd_print_host_regs(hba); - ufshcd_print_pwr_info(hba); - ufshcd_print_tmrs(hba, hba->outstanding_tasks); - ufshcd_print_trs(hba, hba->outstanding_reqs, - pr_prdt); - } ufshcd_schedule_eh_work(hba); retval |= IRQ_HANDLED; }
Sometime dumps in IRQ handler are heavy enough to cause system stability issues, move them to error handler. Signed-off-by: Can Guo <cang@codeaurora.org> --- drivers/scsi/ufs/ufshcd.c | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-)