diff mbox series

[2/2] scsi: ufs: handle LINERESET with correct tm_cmd

Message ID 20210106193649.3348230-2-jaegeuk@kernel.org (mailing list archive)
State Superseded
Headers show
Series [1/2] scsi: ufs: fix livelock of ufshcd_clear_ua_wluns | expand

Commit Message

Jaegeuk Kim Jan. 6, 2021, 7:36 p.m. UTC
From: Jaegeuk Kim <jaegeuk@google.com>

This fixes a warning caused by wrong reserve tag usage in __ufshcd_issue_tm_cmd.

WARNING: CPU: 7 PID: 7 at block/blk-core.c:630 blk_get_request+0x68/0x70
WARNING: CPU: 4 PID: 157 at block/blk-mq-tag.c:82 blk_mq_get_tag+0x438/0x46c

And, in ufshcd_err_handler(), we can avoid to send tm_cmd before aborting
outstanding commands by waiting a bit for IO completion like this.

__ufshcd_issue_tm_cmd: task management cmd 0x80 timed-out

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 drivers/scsi/ufs/ufshcd.c | 36 ++++++++++++++++++++++++++++++++----
 1 file changed, 32 insertions(+), 4 deletions(-)

Comments

kernel test robot Jan. 6, 2021, 9:09 p.m. UTC | #1
Hi Jaegeuk,

I love your patch! Perhaps something to improve:

[auto build test WARNING on scsi/for-next]
[also build test WARNING on mkp-scsi/for-next linus/master v5.11-rc2 next-20210104]
[cannot apply to linux/master]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Jaegeuk-Kim/scsi-ufs-fix-livelock-of-ufshcd_clear_ua_wluns/20210107-034119
base:   https://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi.git for-next
config: nds32-randconfig-r012-20210106 (attached as .config)
compiler: nds32le-linux-gcc (GCC) 9.3.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/0day-ci/linux/commit/1ae2226bbc3a8096dfceaab9c598f02d387915ba
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Jaegeuk-Kim/scsi-ufs-fix-livelock-of-ufshcd_clear_ua_wluns/20210107-034119
        git checkout 1ae2226bbc3a8096dfceaab9c598f02d387915ba
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross ARCH=nds32 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

   In file included from include/linux/device.h:15,
                    from include/linux/async.h:14,
                    from drivers/scsi/ufs/ufshcd.c:12:
   drivers/scsi/ufs/ufshcd.c: In function 'ufshcd_err_handler':
>> drivers/scsi/ufs/ufshcd.c:5922:23: warning: format '%x' expects argument of type 'unsigned int', but argument 4 has type 'long unsigned int' [-Wformat=]
    5922 |     dev_err(hba->dev, "%s: timeout, outstanding=%x\n",
         |                       ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   include/linux/dev_printk.h:19:22: note: in definition of macro 'dev_fmt'
      19 | #define dev_fmt(fmt) fmt
         |                      ^~~
   drivers/scsi/ufs/ufshcd.c:5922:5: note: in expansion of macro 'dev_err'
    5922 |     dev_err(hba->dev, "%s: timeout, outstanding=%x\n",
         |     ^~~~~~~
   drivers/scsi/ufs/ufshcd.c:5922:50: note: format string is defined here
    5922 |     dev_err(hba->dev, "%s: timeout, outstanding=%x\n",
         |                                                 ~^
         |                                                  |
         |                                                  unsigned int
         |                                                 %lx


vim +5922 drivers/scsi/ufs/ufshcd.c

  5818	
  5819	/**
  5820	 * ufshcd_err_handler - handle UFS errors that require s/w attention
  5821	 * @work: pointer to work structure
  5822	 */
  5823	static void ufshcd_err_handler(struct work_struct *work)
  5824	{
  5825		struct ufs_hba *hba;
  5826		unsigned long flags;
  5827		bool err_xfer = false;
  5828		bool err_tm = false;
  5829		int err = 0, pmc_err;
  5830		int tag;
  5831		bool needs_reset = false, needs_restore = false;
  5832	
  5833		hba = container_of(work, struct ufs_hba, eh_work);
  5834	
  5835		down(&hba->eh_sem);
  5836		spin_lock_irqsave(hba->host->host_lock, flags);
  5837		if (ufshcd_err_handling_should_stop(hba)) {
  5838			if (hba->ufshcd_state != UFSHCD_STATE_ERROR)
  5839				hba->ufshcd_state = UFSHCD_STATE_OPERATIONAL;
  5840			spin_unlock_irqrestore(hba->host->host_lock, flags);
  5841			up(&hba->eh_sem);
  5842			return;
  5843		}
  5844		ufshcd_set_eh_in_progress(hba);
  5845		spin_unlock_irqrestore(hba->host->host_lock, flags);
  5846		ufshcd_err_handling_prepare(hba);
  5847		spin_lock_irqsave(hba->host->host_lock, flags);
  5848		ufshcd_scsi_block_requests(hba);
  5849		hba->ufshcd_state = UFSHCD_STATE_RESET;
  5850	
  5851		/* Complete requests that have door-bell cleared by h/w */
  5852		ufshcd_complete_requests(hba);
  5853	
  5854		/*
  5855		 * A full reset and restore might have happened after preparation
  5856		 * is finished, double check whether we should stop.
  5857		 */
  5858		if (ufshcd_err_handling_should_stop(hba))
  5859			goto skip_err_handling;
  5860	
  5861		if (hba->dev_quirks & UFS_DEVICE_QUIRK_RECOVERY_FROM_DL_NAC_ERRORS) {
  5862			bool ret;
  5863	
  5864			spin_unlock_irqrestore(hba->host->host_lock, flags);
  5865			/* release the lock as ufshcd_quirk_dl_nac_errors() may sleep */
  5866			ret = ufshcd_quirk_dl_nac_errors(hba);
  5867			spin_lock_irqsave(hba->host->host_lock, flags);
  5868			if (!ret && ufshcd_err_handling_should_stop(hba))
  5869				goto skip_err_handling;
  5870		}
  5871	
  5872		if ((hba->saved_err & (INT_FATAL_ERRORS | UFSHCD_UIC_HIBERN8_MASK)) ||
  5873		    (hba->saved_uic_err &&
  5874		     (hba->saved_uic_err != UFSHCD_UIC_PA_GENERIC_ERROR))) {
  5875			bool pr_prdt = !!(hba->saved_err & SYSTEM_BUS_FATAL_ERROR);
  5876	
  5877			spin_unlock_irqrestore(hba->host->host_lock, flags);
  5878			ufshcd_print_host_state(hba);
  5879			ufshcd_print_pwr_info(hba);
  5880			ufshcd_print_evt_hist(hba);
  5881			ufshcd_print_tmrs(hba, hba->outstanding_tasks);
  5882			ufshcd_print_trs(hba, hba->outstanding_reqs, pr_prdt);
  5883			spin_lock_irqsave(hba->host->host_lock, flags);
  5884		}
  5885	
  5886		/*
  5887		 * if host reset is required then skip clearing the pending
  5888		 * transfers forcefully because they will get cleared during
  5889		 * host reset and restore
  5890		 */
  5891		if (hba->force_reset || ufshcd_is_link_broken(hba) ||
  5892		    ufshcd_is_saved_err_fatal(hba) ||
  5893		    ((hba->saved_err & UIC_ERROR) &&
  5894		     (hba->saved_uic_err & (UFSHCD_UIC_DL_NAC_RECEIVED_ERROR |
  5895					    UFSHCD_UIC_DL_TCx_REPLAY_ERROR)))) {
  5896			needs_reset = true;
  5897			goto do_reset;
  5898		}
  5899	
  5900		/*
  5901		 * If LINERESET was caught, UFS might have been put to PWM mode,
  5902		 * check if power mode restore is needed.
  5903		 */
  5904		if (hba->saved_uic_err & UFSHCD_UIC_PA_GENERIC_ERROR) {
  5905			ktime_t start = ktime_get();
  5906	
  5907			hba->saved_uic_err &= ~UFSHCD_UIC_PA_GENERIC_ERROR;
  5908			if (!hba->saved_uic_err)
  5909				hba->saved_err &= ~UIC_ERROR;
  5910			spin_unlock_irqrestore(hba->host->host_lock, flags);
  5911			if (ufshcd_is_pwr_mode_restore_needed(hba))
  5912				needs_restore = true;
  5913			spin_lock_irqsave(hba->host->host_lock, flags);
  5914			/* Wait for IO completion to avoid aborting IOs */
  5915			while (hba->outstanding_reqs) {
  5916				ufshcd_complete_requests(hba);
  5917				spin_unlock_irqrestore(hba->host->host_lock, flags);
  5918				schedule();
  5919				spin_lock_irqsave(hba->host->host_lock, flags);
  5920				if (ktime_to_ms(ktime_sub(ktime_get(), start)) >
  5921							LINERESET_IO_TIMEOUT_MS) {
> 5922					dev_err(hba->dev, "%s: timeout, outstanding=%x\n",
  5923						__func__, hba->outstanding_reqs);
  5924					break;
  5925				}
  5926			}
  5927	
  5928			if (!hba->saved_err && !needs_restore)
  5929				goto skip_err_handling;
  5930		}
  5931	
  5932		hba->silence_err_logs = true;
  5933		/* release lock as clear command might sleep */
  5934		spin_unlock_irqrestore(hba->host->host_lock, flags);
  5935		/* Clear pending transfer requests */
  5936		for_each_set_bit(tag, &hba->outstanding_reqs, hba->nutrs) {
  5937			if (ufshcd_try_to_abort_task(hba, tag)) {
  5938				err_xfer = true;
  5939				goto lock_skip_pending_xfer_clear;
  5940			}
  5941		}
  5942	
  5943		/* Clear pending task management requests */
  5944		for_each_set_bit(tag, &hba->outstanding_tasks, hba->nutmrs) {
  5945			if (ufshcd_clear_tm_cmd(hba, tag)) {
  5946				err_tm = true;
  5947				goto lock_skip_pending_xfer_clear;
  5948			}
  5949		}
  5950	
  5951	lock_skip_pending_xfer_clear:
  5952		spin_lock_irqsave(hba->host->host_lock, flags);
  5953	
  5954		/* Complete the requests that are cleared by s/w */
  5955		ufshcd_complete_requests(hba);
  5956		hba->silence_err_logs = false;
  5957	
  5958		if (err_xfer || err_tm) {
  5959			needs_reset = true;
  5960			goto do_reset;
  5961		}
  5962	
  5963		/*
  5964		 * After all reqs and tasks are cleared from doorbell,
  5965		 * now it is safe to retore power mode.
  5966		 */
  5967		if (needs_restore) {
  5968			spin_unlock_irqrestore(hba->host->host_lock, flags);
  5969			/*
  5970			 * Hold the scaling lock just in case dev cmds
  5971			 * are sent via bsg and/or sysfs.
  5972			 */
  5973			down_write(&hba->clk_scaling_lock);
  5974			hba->force_pmc = true;
  5975			pmc_err = ufshcd_config_pwr_mode(hba, &(hba->pwr_info));
  5976			if (pmc_err) {
  5977				needs_reset = true;
  5978				dev_err(hba->dev, "%s: Failed to restore power mode, err = %d\n",
  5979						__func__, pmc_err);
  5980			}
  5981			hba->force_pmc = false;
  5982			ufshcd_print_pwr_info(hba);
  5983			up_write(&hba->clk_scaling_lock);
  5984			spin_lock_irqsave(hba->host->host_lock, flags);
  5985		}
  5986	
  5987	do_reset:
  5988		/* Fatal errors need reset */
  5989		if (needs_reset) {
  5990			unsigned long max_doorbells = (1UL << hba->nutrs) - 1;
  5991	
  5992			/*
  5993			 * ufshcd_reset_and_restore() does the link reinitialization
  5994			 * which will need atleast one empty doorbell slot to send the
  5995			 * device management commands (NOP and query commands).
  5996			 * If there is no slot empty at this moment then free up last
  5997			 * slot forcefully.
  5998			 */
  5999			if (hba->outstanding_reqs == max_doorbells)
  6000				__ufshcd_transfer_req_compl(hba,
  6001							    (1UL << (hba->nutrs - 1)));
  6002	
  6003			hba->force_reset = false;
  6004			spin_unlock_irqrestore(hba->host->host_lock, flags);
  6005			err = ufshcd_reset_and_restore(hba);
  6006			if (err)
  6007				dev_err(hba->dev, "%s: reset and restore failed with err %d\n",
  6008						__func__, err);
  6009			else
  6010				ufshcd_recover_pm_error(hba);
  6011			spin_lock_irqsave(hba->host->host_lock, flags);
  6012		}
  6013	
  6014	skip_err_handling:
  6015		if (!needs_reset) {
  6016			if (hba->ufshcd_state == UFSHCD_STATE_RESET)
  6017				hba->ufshcd_state = UFSHCD_STATE_OPERATIONAL;
  6018			if (hba->saved_err || hba->saved_uic_err)
  6019				dev_err_ratelimited(hba->dev, "%s: exit: saved_err 0x%x saved_uic_err 0x%x",
  6020				    __func__, hba->saved_err, hba->saved_uic_err);
  6021		}
  6022		ufshcd_clear_eh_in_progress(hba);
  6023		spin_unlock_irqrestore(hba->host->host_lock, flags);
  6024		ufshcd_scsi_unblock_requests(hba);
  6025		ufshcd_err_handling_unprepare(hba);
  6026		up(&hba->eh_sem);
  6027	
  6028		if (!err && needs_reset)
  6029			ufshcd_clear_ua_wluns(hba);
  6030	}
  6031	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
kernel test robot Jan. 6, 2021, 9:25 p.m. UTC | #2
Hi Jaegeuk,

I love your patch! Perhaps something to improve:

[auto build test WARNING on scsi/for-next]
[also build test WARNING on mkp-scsi/for-next linus/master v5.11-rc2 next-20210104]
[cannot apply to linux/master]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Jaegeuk-Kim/scsi-ufs-fix-livelock-of-ufshcd_clear_ua_wluns/20210107-034119
base:   https://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi.git for-next
config: x86_64-randconfig-r016-20210106 (attached as .config)
compiler: clang version 12.0.0 (https://github.com/llvm/llvm-project 5c951623bc8965fa1e89660f2f5f4a2944e4981a)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # install x86_64 cross compiling tool for clang build
        # apt-get install binutils-x86-64-linux-gnu
        # https://github.com/0day-ci/linux/commit/1ae2226bbc3a8096dfceaab9c598f02d387915ba
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Jaegeuk-Kim/scsi-ufs-fix-livelock-of-ufshcd_clear_ua_wluns/20210107-034119
        git checkout 1ae2226bbc3a8096dfceaab9c598f02d387915ba
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross ARCH=x86_64 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

>> drivers/scsi/ufs/ufshcd.c:5923:16: warning: format specifies type 'unsigned int' but the argument has type 'unsigned long' [-Wformat]
                                           __func__, hba->outstanding_reqs);
                                                     ^~~~~~~~~~~~~~~~~~~~~
   include/linux/dev_printk.h:112:32: note: expanded from macro 'dev_err'
           _dev_err(dev, dev_fmt(fmt), ##__VA_ARGS__)
                                 ~~~     ^~~~~~~~~~~
   1 warning generated.


vim +5923 drivers/scsi/ufs/ufshcd.c

  5818	
  5819	/**
  5820	 * ufshcd_err_handler - handle UFS errors that require s/w attention
  5821	 * @work: pointer to work structure
  5822	 */
  5823	static void ufshcd_err_handler(struct work_struct *work)
  5824	{
  5825		struct ufs_hba *hba;
  5826		unsigned long flags;
  5827		bool err_xfer = false;
  5828		bool err_tm = false;
  5829		int err = 0, pmc_err;
  5830		int tag;
  5831		bool needs_reset = false, needs_restore = false;
  5832	
  5833		hba = container_of(work, struct ufs_hba, eh_work);
  5834	
  5835		down(&hba->eh_sem);
  5836		spin_lock_irqsave(hba->host->host_lock, flags);
  5837		if (ufshcd_err_handling_should_stop(hba)) {
  5838			if (hba->ufshcd_state != UFSHCD_STATE_ERROR)
  5839				hba->ufshcd_state = UFSHCD_STATE_OPERATIONAL;
  5840			spin_unlock_irqrestore(hba->host->host_lock, flags);
  5841			up(&hba->eh_sem);
  5842			return;
  5843		}
  5844		ufshcd_set_eh_in_progress(hba);
  5845		spin_unlock_irqrestore(hba->host->host_lock, flags);
  5846		ufshcd_err_handling_prepare(hba);
  5847		spin_lock_irqsave(hba->host->host_lock, flags);
  5848		ufshcd_scsi_block_requests(hba);
  5849		hba->ufshcd_state = UFSHCD_STATE_RESET;
  5850	
  5851		/* Complete requests that have door-bell cleared by h/w */
  5852		ufshcd_complete_requests(hba);
  5853	
  5854		/*
  5855		 * A full reset and restore might have happened after preparation
  5856		 * is finished, double check whether we should stop.
  5857		 */
  5858		if (ufshcd_err_handling_should_stop(hba))
  5859			goto skip_err_handling;
  5860	
  5861		if (hba->dev_quirks & UFS_DEVICE_QUIRK_RECOVERY_FROM_DL_NAC_ERRORS) {
  5862			bool ret;
  5863	
  5864			spin_unlock_irqrestore(hba->host->host_lock, flags);
  5865			/* release the lock as ufshcd_quirk_dl_nac_errors() may sleep */
  5866			ret = ufshcd_quirk_dl_nac_errors(hba);
  5867			spin_lock_irqsave(hba->host->host_lock, flags);
  5868			if (!ret && ufshcd_err_handling_should_stop(hba))
  5869				goto skip_err_handling;
  5870		}
  5871	
  5872		if ((hba->saved_err & (INT_FATAL_ERRORS | UFSHCD_UIC_HIBERN8_MASK)) ||
  5873		    (hba->saved_uic_err &&
  5874		     (hba->saved_uic_err != UFSHCD_UIC_PA_GENERIC_ERROR))) {
  5875			bool pr_prdt = !!(hba->saved_err & SYSTEM_BUS_FATAL_ERROR);
  5876	
  5877			spin_unlock_irqrestore(hba->host->host_lock, flags);
  5878			ufshcd_print_host_state(hba);
  5879			ufshcd_print_pwr_info(hba);
  5880			ufshcd_print_evt_hist(hba);
  5881			ufshcd_print_tmrs(hba, hba->outstanding_tasks);
  5882			ufshcd_print_trs(hba, hba->outstanding_reqs, pr_prdt);
  5883			spin_lock_irqsave(hba->host->host_lock, flags);
  5884		}
  5885	
  5886		/*
  5887		 * if host reset is required then skip clearing the pending
  5888		 * transfers forcefully because they will get cleared during
  5889		 * host reset and restore
  5890		 */
  5891		if (hba->force_reset || ufshcd_is_link_broken(hba) ||
  5892		    ufshcd_is_saved_err_fatal(hba) ||
  5893		    ((hba->saved_err & UIC_ERROR) &&
  5894		     (hba->saved_uic_err & (UFSHCD_UIC_DL_NAC_RECEIVED_ERROR |
  5895					    UFSHCD_UIC_DL_TCx_REPLAY_ERROR)))) {
  5896			needs_reset = true;
  5897			goto do_reset;
  5898		}
  5899	
  5900		/*
  5901		 * If LINERESET was caught, UFS might have been put to PWM mode,
  5902		 * check if power mode restore is needed.
  5903		 */
  5904		if (hba->saved_uic_err & UFSHCD_UIC_PA_GENERIC_ERROR) {
  5905			ktime_t start = ktime_get();
  5906	
  5907			hba->saved_uic_err &= ~UFSHCD_UIC_PA_GENERIC_ERROR;
  5908			if (!hba->saved_uic_err)
  5909				hba->saved_err &= ~UIC_ERROR;
  5910			spin_unlock_irqrestore(hba->host->host_lock, flags);
  5911			if (ufshcd_is_pwr_mode_restore_needed(hba))
  5912				needs_restore = true;
  5913			spin_lock_irqsave(hba->host->host_lock, flags);
  5914			/* Wait for IO completion to avoid aborting IOs */
  5915			while (hba->outstanding_reqs) {
  5916				ufshcd_complete_requests(hba);
  5917				spin_unlock_irqrestore(hba->host->host_lock, flags);
  5918				schedule();
  5919				spin_lock_irqsave(hba->host->host_lock, flags);
  5920				if (ktime_to_ms(ktime_sub(ktime_get(), start)) >
  5921							LINERESET_IO_TIMEOUT_MS) {
  5922					dev_err(hba->dev, "%s: timeout, outstanding=%x\n",
> 5923						__func__, hba->outstanding_reqs);
  5924					break;
  5925				}
  5926			}
  5927	
  5928			if (!hba->saved_err && !needs_restore)
  5929				goto skip_err_handling;
  5930		}
  5931	
  5932		hba->silence_err_logs = true;
  5933		/* release lock as clear command might sleep */
  5934		spin_unlock_irqrestore(hba->host->host_lock, flags);
  5935		/* Clear pending transfer requests */
  5936		for_each_set_bit(tag, &hba->outstanding_reqs, hba->nutrs) {
  5937			if (ufshcd_try_to_abort_task(hba, tag)) {
  5938				err_xfer = true;
  5939				goto lock_skip_pending_xfer_clear;
  5940			}
  5941		}
  5942	
  5943		/* Clear pending task management requests */
  5944		for_each_set_bit(tag, &hba->outstanding_tasks, hba->nutmrs) {
  5945			if (ufshcd_clear_tm_cmd(hba, tag)) {
  5946				err_tm = true;
  5947				goto lock_skip_pending_xfer_clear;
  5948			}
  5949		}
  5950	
  5951	lock_skip_pending_xfer_clear:
  5952		spin_lock_irqsave(hba->host->host_lock, flags);
  5953	
  5954		/* Complete the requests that are cleared by s/w */
  5955		ufshcd_complete_requests(hba);
  5956		hba->silence_err_logs = false;
  5957	
  5958		if (err_xfer || err_tm) {
  5959			needs_reset = true;
  5960			goto do_reset;
  5961		}
  5962	
  5963		/*
  5964		 * After all reqs and tasks are cleared from doorbell,
  5965		 * now it is safe to retore power mode.
  5966		 */
  5967		if (needs_restore) {
  5968			spin_unlock_irqrestore(hba->host->host_lock, flags);
  5969			/*
  5970			 * Hold the scaling lock just in case dev cmds
  5971			 * are sent via bsg and/or sysfs.
  5972			 */
  5973			down_write(&hba->clk_scaling_lock);
  5974			hba->force_pmc = true;
  5975			pmc_err = ufshcd_config_pwr_mode(hba, &(hba->pwr_info));
  5976			if (pmc_err) {
  5977				needs_reset = true;
  5978				dev_err(hba->dev, "%s: Failed to restore power mode, err = %d\n",
  5979						__func__, pmc_err);
  5980			}
  5981			hba->force_pmc = false;
  5982			ufshcd_print_pwr_info(hba);
  5983			up_write(&hba->clk_scaling_lock);
  5984			spin_lock_irqsave(hba->host->host_lock, flags);
  5985		}
  5986	
  5987	do_reset:
  5988		/* Fatal errors need reset */
  5989		if (needs_reset) {
  5990			unsigned long max_doorbells = (1UL << hba->nutrs) - 1;
  5991	
  5992			/*
  5993			 * ufshcd_reset_and_restore() does the link reinitialization
  5994			 * which will need atleast one empty doorbell slot to send the
  5995			 * device management commands (NOP and query commands).
  5996			 * If there is no slot empty at this moment then free up last
  5997			 * slot forcefully.
  5998			 */
  5999			if (hba->outstanding_reqs == max_doorbells)
  6000				__ufshcd_transfer_req_compl(hba,
  6001							    (1UL << (hba->nutrs - 1)));
  6002	
  6003			hba->force_reset = false;
  6004			spin_unlock_irqrestore(hba->host->host_lock, flags);
  6005			err = ufshcd_reset_and_restore(hba);
  6006			if (err)
  6007				dev_err(hba->dev, "%s: reset and restore failed with err %d\n",
  6008						__func__, err);
  6009			else
  6010				ufshcd_recover_pm_error(hba);
  6011			spin_lock_irqsave(hba->host->host_lock, flags);
  6012		}
  6013	
  6014	skip_err_handling:
  6015		if (!needs_reset) {
  6016			if (hba->ufshcd_state == UFSHCD_STATE_RESET)
  6017				hba->ufshcd_state = UFSHCD_STATE_OPERATIONAL;
  6018			if (hba->saved_err || hba->saved_uic_err)
  6019				dev_err_ratelimited(hba->dev, "%s: exit: saved_err 0x%x saved_uic_err 0x%x",
  6020				    __func__, hba->saved_err, hba->saved_uic_err);
  6021		}
  6022		ufshcd_clear_eh_in_progress(hba);
  6023		spin_unlock_irqrestore(hba->host->host_lock, flags);
  6024		ufshcd_scsi_unblock_requests(hba);
  6025		ufshcd_err_handling_unprepare(hba);
  6026		up(&hba->eh_sem);
  6027	
  6028		if (!err && needs_reset)
  6029			ufshcd_clear_ua_wluns(hba);
  6030	}
  6031	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
diff mbox series

Patch

diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 1678cec08b51..377da8e98d9b 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -44,6 +44,9 @@ 
 /* Query request timeout */
 #define QUERY_REQ_TIMEOUT 1500 /* 1.5 seconds */
 
+/* LINERESET TIME OUT */
+#define LINERESET_IO_TIMEOUT_MS			(30000) /* 30 sec */
+
 /* Task management command timeout */
 #define TM_CMD_TIMEOUT	100 /* msecs */
 
@@ -5899,6 +5902,8 @@  static void ufshcd_err_handler(struct work_struct *work)
 	 * check if power mode restore is needed.
 	 */
 	if (hba->saved_uic_err & UFSHCD_UIC_PA_GENERIC_ERROR) {
+		ktime_t start = ktime_get();
+
 		hba->saved_uic_err &= ~UFSHCD_UIC_PA_GENERIC_ERROR;
 		if (!hba->saved_uic_err)
 			hba->saved_err &= ~UIC_ERROR;
@@ -5906,6 +5911,20 @@  static void ufshcd_err_handler(struct work_struct *work)
 		if (ufshcd_is_pwr_mode_restore_needed(hba))
 			needs_restore = true;
 		spin_lock_irqsave(hba->host->host_lock, flags);
+		/* Wait for IO completion to avoid aborting IOs */
+		while (hba->outstanding_reqs) {
+			ufshcd_complete_requests(hba);
+			spin_unlock_irqrestore(hba->host->host_lock, flags);
+			schedule();
+			spin_lock_irqsave(hba->host->host_lock, flags);
+			if (ktime_to_ms(ktime_sub(ktime_get(), start)) >
+						LINERESET_IO_TIMEOUT_MS) {
+				dev_err(hba->dev, "%s: timeout, outstanding=%x\n",
+					__func__, hba->outstanding_reqs);
+				break;
+			}
+		}
+
 		if (!hba->saved_err && !needs_restore)
 			goto skip_err_handling;
 	}
@@ -6302,9 +6321,13 @@  static irqreturn_t ufshcd_intr(int irq, void *__hba)
 		intr_status = ufshcd_readl(hba, REG_INTERRUPT_STATUS);
 	}
 
-	if (enabled_intr_status && retval == IRQ_NONE) {
-		dev_err(hba->dev, "%s: Unhandled interrupt 0x%08x\n",
-					__func__, intr_status);
+	if (enabled_intr_status && retval == IRQ_NONE &&
+				!ufshcd_eh_in_progress(hba)) {
+		dev_err(hba->dev, "%s: Unhandled interrupt 0x%08x (0x%08x, 0x%08x)\n",
+					__func__,
+					intr_status,
+					hba->ufs_stats.last_intr_status,
+					enabled_intr_status);
 		ufshcd_dump_regs(hba, 0, UFSHCI_REG_SPACE_SIZE, "host_regs: ");
 	}
 
@@ -6348,7 +6371,11 @@  static int __ufshcd_issue_tm_cmd(struct ufs_hba *hba,
 	 * Even though we use wait_event() which sleeps indefinitely,
 	 * the maximum wait time is bounded by %TM_CMD_TIMEOUT.
 	 */
-	req = blk_get_request(q, REQ_OP_DRV_OUT, BLK_MQ_REQ_RESERVED);
+	req = blk_get_request(q, REQ_OP_DRV_OUT, BLK_MQ_REQ_RESERVED |
+						BLK_MQ_REQ_NOWAIT);
+	if (IS_ERR(req))
+		return PTR_ERR(req);
+
 	req->end_io_data = &wait;
 	free_slot = req->tag;
 	WARN_ON_ONCE(free_slot < 0 || free_slot >= hba->nutmrs);
@@ -9355,6 +9382,7 @@  int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq)
 
 	hba->tmf_tag_set = (struct blk_mq_tag_set) {
 		.nr_hw_queues	= 1,
+		.reserved_tags	= 1,
 		.queue_depth	= hba->nutmrs,
 		.ops		= &ufshcd_tmf_ops,
 		.flags		= BLK_MQ_F_NO_SCHED,