Message ID | 20210106193649.3348230-2-jaegeuk@kernel.org (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | [1/2] scsi: ufs: fix livelock of ufshcd_clear_ua_wluns | expand |
Hi Jaegeuk, I love your patch! Perhaps something to improve: [auto build test WARNING on scsi/for-next] [also build test WARNING on mkp-scsi/for-next linus/master v5.11-rc2 next-20210104] [cannot apply to linux/master] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch] url: https://github.com/0day-ci/linux/commits/Jaegeuk-Kim/scsi-ufs-fix-livelock-of-ufshcd_clear_ua_wluns/20210107-034119 base: https://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi.git for-next config: nds32-randconfig-r012-20210106 (attached as .config) compiler: nds32le-linux-gcc (GCC) 9.3.0 reproduce (this is a W=1 build): wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # https://github.com/0day-ci/linux/commit/1ae2226bbc3a8096dfceaab9c598f02d387915ba git remote add linux-review https://github.com/0day-ci/linux git fetch --no-tags linux-review Jaegeuk-Kim/scsi-ufs-fix-livelock-of-ufshcd_clear_ua_wluns/20210107-034119 git checkout 1ae2226bbc3a8096dfceaab9c598f02d387915ba # save the attached .config to linux build tree COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross ARCH=nds32 If you fix the issue, kindly add following tag as appropriate Reported-by: kernel test robot <lkp@intel.com> All warnings (new ones prefixed by >>): In file included from include/linux/device.h:15, from include/linux/async.h:14, from drivers/scsi/ufs/ufshcd.c:12: drivers/scsi/ufs/ufshcd.c: In function 'ufshcd_err_handler': >> drivers/scsi/ufs/ufshcd.c:5922:23: warning: format '%x' expects argument of type 'unsigned int', but argument 4 has type 'long unsigned int' [-Wformat=] 5922 | dev_err(hba->dev, "%s: timeout, outstanding=%x\n", | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ include/linux/dev_printk.h:19:22: note: in definition of macro 'dev_fmt' 19 | #define dev_fmt(fmt) fmt | ^~~ drivers/scsi/ufs/ufshcd.c:5922:5: note: in expansion of macro 'dev_err' 5922 | dev_err(hba->dev, "%s: timeout, outstanding=%x\n", | ^~~~~~~ drivers/scsi/ufs/ufshcd.c:5922:50: note: format string is defined here 5922 | dev_err(hba->dev, "%s: timeout, outstanding=%x\n", | ~^ | | | unsigned int | %lx vim +5922 drivers/scsi/ufs/ufshcd.c 5818 5819 /** 5820 * ufshcd_err_handler - handle UFS errors that require s/w attention 5821 * @work: pointer to work structure 5822 */ 5823 static void ufshcd_err_handler(struct work_struct *work) 5824 { 5825 struct ufs_hba *hba; 5826 unsigned long flags; 5827 bool err_xfer = false; 5828 bool err_tm = false; 5829 int err = 0, pmc_err; 5830 int tag; 5831 bool needs_reset = false, needs_restore = false; 5832 5833 hba = container_of(work, struct ufs_hba, eh_work); 5834 5835 down(&hba->eh_sem); 5836 spin_lock_irqsave(hba->host->host_lock, flags); 5837 if (ufshcd_err_handling_should_stop(hba)) { 5838 if (hba->ufshcd_state != UFSHCD_STATE_ERROR) 5839 hba->ufshcd_state = UFSHCD_STATE_OPERATIONAL; 5840 spin_unlock_irqrestore(hba->host->host_lock, flags); 5841 up(&hba->eh_sem); 5842 return; 5843 } 5844 ufshcd_set_eh_in_progress(hba); 5845 spin_unlock_irqrestore(hba->host->host_lock, flags); 5846 ufshcd_err_handling_prepare(hba); 5847 spin_lock_irqsave(hba->host->host_lock, flags); 5848 ufshcd_scsi_block_requests(hba); 5849 hba->ufshcd_state = UFSHCD_STATE_RESET; 5850 5851 /* Complete requests that have door-bell cleared by h/w */ 5852 ufshcd_complete_requests(hba); 5853 5854 /* 5855 * A full reset and restore might have happened after preparation 5856 * is finished, double check whether we should stop. 5857 */ 5858 if (ufshcd_err_handling_should_stop(hba)) 5859 goto skip_err_handling; 5860 5861 if (hba->dev_quirks & UFS_DEVICE_QUIRK_RECOVERY_FROM_DL_NAC_ERRORS) { 5862 bool ret; 5863 5864 spin_unlock_irqrestore(hba->host->host_lock, flags); 5865 /* release the lock as ufshcd_quirk_dl_nac_errors() may sleep */ 5866 ret = ufshcd_quirk_dl_nac_errors(hba); 5867 spin_lock_irqsave(hba->host->host_lock, flags); 5868 if (!ret && ufshcd_err_handling_should_stop(hba)) 5869 goto skip_err_handling; 5870 } 5871 5872 if ((hba->saved_err & (INT_FATAL_ERRORS | UFSHCD_UIC_HIBERN8_MASK)) || 5873 (hba->saved_uic_err && 5874 (hba->saved_uic_err != UFSHCD_UIC_PA_GENERIC_ERROR))) { 5875 bool pr_prdt = !!(hba->saved_err & SYSTEM_BUS_FATAL_ERROR); 5876 5877 spin_unlock_irqrestore(hba->host->host_lock, flags); 5878 ufshcd_print_host_state(hba); 5879 ufshcd_print_pwr_info(hba); 5880 ufshcd_print_evt_hist(hba); 5881 ufshcd_print_tmrs(hba, hba->outstanding_tasks); 5882 ufshcd_print_trs(hba, hba->outstanding_reqs, pr_prdt); 5883 spin_lock_irqsave(hba->host->host_lock, flags); 5884 } 5885 5886 /* 5887 * if host reset is required then skip clearing the pending 5888 * transfers forcefully because they will get cleared during 5889 * host reset and restore 5890 */ 5891 if (hba->force_reset || ufshcd_is_link_broken(hba) || 5892 ufshcd_is_saved_err_fatal(hba) || 5893 ((hba->saved_err & UIC_ERROR) && 5894 (hba->saved_uic_err & (UFSHCD_UIC_DL_NAC_RECEIVED_ERROR | 5895 UFSHCD_UIC_DL_TCx_REPLAY_ERROR)))) { 5896 needs_reset = true; 5897 goto do_reset; 5898 } 5899 5900 /* 5901 * If LINERESET was caught, UFS might have been put to PWM mode, 5902 * check if power mode restore is needed. 5903 */ 5904 if (hba->saved_uic_err & UFSHCD_UIC_PA_GENERIC_ERROR) { 5905 ktime_t start = ktime_get(); 5906 5907 hba->saved_uic_err &= ~UFSHCD_UIC_PA_GENERIC_ERROR; 5908 if (!hba->saved_uic_err) 5909 hba->saved_err &= ~UIC_ERROR; 5910 spin_unlock_irqrestore(hba->host->host_lock, flags); 5911 if (ufshcd_is_pwr_mode_restore_needed(hba)) 5912 needs_restore = true; 5913 spin_lock_irqsave(hba->host->host_lock, flags); 5914 /* Wait for IO completion to avoid aborting IOs */ 5915 while (hba->outstanding_reqs) { 5916 ufshcd_complete_requests(hba); 5917 spin_unlock_irqrestore(hba->host->host_lock, flags); 5918 schedule(); 5919 spin_lock_irqsave(hba->host->host_lock, flags); 5920 if (ktime_to_ms(ktime_sub(ktime_get(), start)) > 5921 LINERESET_IO_TIMEOUT_MS) { > 5922 dev_err(hba->dev, "%s: timeout, outstanding=%x\n", 5923 __func__, hba->outstanding_reqs); 5924 break; 5925 } 5926 } 5927 5928 if (!hba->saved_err && !needs_restore) 5929 goto skip_err_handling; 5930 } 5931 5932 hba->silence_err_logs = true; 5933 /* release lock as clear command might sleep */ 5934 spin_unlock_irqrestore(hba->host->host_lock, flags); 5935 /* Clear pending transfer requests */ 5936 for_each_set_bit(tag, &hba->outstanding_reqs, hba->nutrs) { 5937 if (ufshcd_try_to_abort_task(hba, tag)) { 5938 err_xfer = true; 5939 goto lock_skip_pending_xfer_clear; 5940 } 5941 } 5942 5943 /* Clear pending task management requests */ 5944 for_each_set_bit(tag, &hba->outstanding_tasks, hba->nutmrs) { 5945 if (ufshcd_clear_tm_cmd(hba, tag)) { 5946 err_tm = true; 5947 goto lock_skip_pending_xfer_clear; 5948 } 5949 } 5950 5951 lock_skip_pending_xfer_clear: 5952 spin_lock_irqsave(hba->host->host_lock, flags); 5953 5954 /* Complete the requests that are cleared by s/w */ 5955 ufshcd_complete_requests(hba); 5956 hba->silence_err_logs = false; 5957 5958 if (err_xfer || err_tm) { 5959 needs_reset = true; 5960 goto do_reset; 5961 } 5962 5963 /* 5964 * After all reqs and tasks are cleared from doorbell, 5965 * now it is safe to retore power mode. 5966 */ 5967 if (needs_restore) { 5968 spin_unlock_irqrestore(hba->host->host_lock, flags); 5969 /* 5970 * Hold the scaling lock just in case dev cmds 5971 * are sent via bsg and/or sysfs. 5972 */ 5973 down_write(&hba->clk_scaling_lock); 5974 hba->force_pmc = true; 5975 pmc_err = ufshcd_config_pwr_mode(hba, &(hba->pwr_info)); 5976 if (pmc_err) { 5977 needs_reset = true; 5978 dev_err(hba->dev, "%s: Failed to restore power mode, err = %d\n", 5979 __func__, pmc_err); 5980 } 5981 hba->force_pmc = false; 5982 ufshcd_print_pwr_info(hba); 5983 up_write(&hba->clk_scaling_lock); 5984 spin_lock_irqsave(hba->host->host_lock, flags); 5985 } 5986 5987 do_reset: 5988 /* Fatal errors need reset */ 5989 if (needs_reset) { 5990 unsigned long max_doorbells = (1UL << hba->nutrs) - 1; 5991 5992 /* 5993 * ufshcd_reset_and_restore() does the link reinitialization 5994 * which will need atleast one empty doorbell slot to send the 5995 * device management commands (NOP and query commands). 5996 * If there is no slot empty at this moment then free up last 5997 * slot forcefully. 5998 */ 5999 if (hba->outstanding_reqs == max_doorbells) 6000 __ufshcd_transfer_req_compl(hba, 6001 (1UL << (hba->nutrs - 1))); 6002 6003 hba->force_reset = false; 6004 spin_unlock_irqrestore(hba->host->host_lock, flags); 6005 err = ufshcd_reset_and_restore(hba); 6006 if (err) 6007 dev_err(hba->dev, "%s: reset and restore failed with err %d\n", 6008 __func__, err); 6009 else 6010 ufshcd_recover_pm_error(hba); 6011 spin_lock_irqsave(hba->host->host_lock, flags); 6012 } 6013 6014 skip_err_handling: 6015 if (!needs_reset) { 6016 if (hba->ufshcd_state == UFSHCD_STATE_RESET) 6017 hba->ufshcd_state = UFSHCD_STATE_OPERATIONAL; 6018 if (hba->saved_err || hba->saved_uic_err) 6019 dev_err_ratelimited(hba->dev, "%s: exit: saved_err 0x%x saved_uic_err 0x%x", 6020 __func__, hba->saved_err, hba->saved_uic_err); 6021 } 6022 ufshcd_clear_eh_in_progress(hba); 6023 spin_unlock_irqrestore(hba->host->host_lock, flags); 6024 ufshcd_scsi_unblock_requests(hba); 6025 ufshcd_err_handling_unprepare(hba); 6026 up(&hba->eh_sem); 6027 6028 if (!err && needs_reset) 6029 ufshcd_clear_ua_wluns(hba); 6030 } 6031 --- 0-DAY CI Kernel Test Service, Intel Corporation https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
Hi Jaegeuk, I love your patch! Perhaps something to improve: [auto build test WARNING on scsi/for-next] [also build test WARNING on mkp-scsi/for-next linus/master v5.11-rc2 next-20210104] [cannot apply to linux/master] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch] url: https://github.com/0day-ci/linux/commits/Jaegeuk-Kim/scsi-ufs-fix-livelock-of-ufshcd_clear_ua_wluns/20210107-034119 base: https://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi.git for-next config: x86_64-randconfig-r016-20210106 (attached as .config) compiler: clang version 12.0.0 (https://github.com/llvm/llvm-project 5c951623bc8965fa1e89660f2f5f4a2944e4981a) reproduce (this is a W=1 build): wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # install x86_64 cross compiling tool for clang build # apt-get install binutils-x86-64-linux-gnu # https://github.com/0day-ci/linux/commit/1ae2226bbc3a8096dfceaab9c598f02d387915ba git remote add linux-review https://github.com/0day-ci/linux git fetch --no-tags linux-review Jaegeuk-Kim/scsi-ufs-fix-livelock-of-ufshcd_clear_ua_wluns/20210107-034119 git checkout 1ae2226bbc3a8096dfceaab9c598f02d387915ba # save the attached .config to linux build tree COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross ARCH=x86_64 If you fix the issue, kindly add following tag as appropriate Reported-by: kernel test robot <lkp@intel.com> All warnings (new ones prefixed by >>): >> drivers/scsi/ufs/ufshcd.c:5923:16: warning: format specifies type 'unsigned int' but the argument has type 'unsigned long' [-Wformat] __func__, hba->outstanding_reqs); ^~~~~~~~~~~~~~~~~~~~~ include/linux/dev_printk.h:112:32: note: expanded from macro 'dev_err' _dev_err(dev, dev_fmt(fmt), ##__VA_ARGS__) ~~~ ^~~~~~~~~~~ 1 warning generated. vim +5923 drivers/scsi/ufs/ufshcd.c 5818 5819 /** 5820 * ufshcd_err_handler - handle UFS errors that require s/w attention 5821 * @work: pointer to work structure 5822 */ 5823 static void ufshcd_err_handler(struct work_struct *work) 5824 { 5825 struct ufs_hba *hba; 5826 unsigned long flags; 5827 bool err_xfer = false; 5828 bool err_tm = false; 5829 int err = 0, pmc_err; 5830 int tag; 5831 bool needs_reset = false, needs_restore = false; 5832 5833 hba = container_of(work, struct ufs_hba, eh_work); 5834 5835 down(&hba->eh_sem); 5836 spin_lock_irqsave(hba->host->host_lock, flags); 5837 if (ufshcd_err_handling_should_stop(hba)) { 5838 if (hba->ufshcd_state != UFSHCD_STATE_ERROR) 5839 hba->ufshcd_state = UFSHCD_STATE_OPERATIONAL; 5840 spin_unlock_irqrestore(hba->host->host_lock, flags); 5841 up(&hba->eh_sem); 5842 return; 5843 } 5844 ufshcd_set_eh_in_progress(hba); 5845 spin_unlock_irqrestore(hba->host->host_lock, flags); 5846 ufshcd_err_handling_prepare(hba); 5847 spin_lock_irqsave(hba->host->host_lock, flags); 5848 ufshcd_scsi_block_requests(hba); 5849 hba->ufshcd_state = UFSHCD_STATE_RESET; 5850 5851 /* Complete requests that have door-bell cleared by h/w */ 5852 ufshcd_complete_requests(hba); 5853 5854 /* 5855 * A full reset and restore might have happened after preparation 5856 * is finished, double check whether we should stop. 5857 */ 5858 if (ufshcd_err_handling_should_stop(hba)) 5859 goto skip_err_handling; 5860 5861 if (hba->dev_quirks & UFS_DEVICE_QUIRK_RECOVERY_FROM_DL_NAC_ERRORS) { 5862 bool ret; 5863 5864 spin_unlock_irqrestore(hba->host->host_lock, flags); 5865 /* release the lock as ufshcd_quirk_dl_nac_errors() may sleep */ 5866 ret = ufshcd_quirk_dl_nac_errors(hba); 5867 spin_lock_irqsave(hba->host->host_lock, flags); 5868 if (!ret && ufshcd_err_handling_should_stop(hba)) 5869 goto skip_err_handling; 5870 } 5871 5872 if ((hba->saved_err & (INT_FATAL_ERRORS | UFSHCD_UIC_HIBERN8_MASK)) || 5873 (hba->saved_uic_err && 5874 (hba->saved_uic_err != UFSHCD_UIC_PA_GENERIC_ERROR))) { 5875 bool pr_prdt = !!(hba->saved_err & SYSTEM_BUS_FATAL_ERROR); 5876 5877 spin_unlock_irqrestore(hba->host->host_lock, flags); 5878 ufshcd_print_host_state(hba); 5879 ufshcd_print_pwr_info(hba); 5880 ufshcd_print_evt_hist(hba); 5881 ufshcd_print_tmrs(hba, hba->outstanding_tasks); 5882 ufshcd_print_trs(hba, hba->outstanding_reqs, pr_prdt); 5883 spin_lock_irqsave(hba->host->host_lock, flags); 5884 } 5885 5886 /* 5887 * if host reset is required then skip clearing the pending 5888 * transfers forcefully because they will get cleared during 5889 * host reset and restore 5890 */ 5891 if (hba->force_reset || ufshcd_is_link_broken(hba) || 5892 ufshcd_is_saved_err_fatal(hba) || 5893 ((hba->saved_err & UIC_ERROR) && 5894 (hba->saved_uic_err & (UFSHCD_UIC_DL_NAC_RECEIVED_ERROR | 5895 UFSHCD_UIC_DL_TCx_REPLAY_ERROR)))) { 5896 needs_reset = true; 5897 goto do_reset; 5898 } 5899 5900 /* 5901 * If LINERESET was caught, UFS might have been put to PWM mode, 5902 * check if power mode restore is needed. 5903 */ 5904 if (hba->saved_uic_err & UFSHCD_UIC_PA_GENERIC_ERROR) { 5905 ktime_t start = ktime_get(); 5906 5907 hba->saved_uic_err &= ~UFSHCD_UIC_PA_GENERIC_ERROR; 5908 if (!hba->saved_uic_err) 5909 hba->saved_err &= ~UIC_ERROR; 5910 spin_unlock_irqrestore(hba->host->host_lock, flags); 5911 if (ufshcd_is_pwr_mode_restore_needed(hba)) 5912 needs_restore = true; 5913 spin_lock_irqsave(hba->host->host_lock, flags); 5914 /* Wait for IO completion to avoid aborting IOs */ 5915 while (hba->outstanding_reqs) { 5916 ufshcd_complete_requests(hba); 5917 spin_unlock_irqrestore(hba->host->host_lock, flags); 5918 schedule(); 5919 spin_lock_irqsave(hba->host->host_lock, flags); 5920 if (ktime_to_ms(ktime_sub(ktime_get(), start)) > 5921 LINERESET_IO_TIMEOUT_MS) { 5922 dev_err(hba->dev, "%s: timeout, outstanding=%x\n", > 5923 __func__, hba->outstanding_reqs); 5924 break; 5925 } 5926 } 5927 5928 if (!hba->saved_err && !needs_restore) 5929 goto skip_err_handling; 5930 } 5931 5932 hba->silence_err_logs = true; 5933 /* release lock as clear command might sleep */ 5934 spin_unlock_irqrestore(hba->host->host_lock, flags); 5935 /* Clear pending transfer requests */ 5936 for_each_set_bit(tag, &hba->outstanding_reqs, hba->nutrs) { 5937 if (ufshcd_try_to_abort_task(hba, tag)) { 5938 err_xfer = true; 5939 goto lock_skip_pending_xfer_clear; 5940 } 5941 } 5942 5943 /* Clear pending task management requests */ 5944 for_each_set_bit(tag, &hba->outstanding_tasks, hba->nutmrs) { 5945 if (ufshcd_clear_tm_cmd(hba, tag)) { 5946 err_tm = true; 5947 goto lock_skip_pending_xfer_clear; 5948 } 5949 } 5950 5951 lock_skip_pending_xfer_clear: 5952 spin_lock_irqsave(hba->host->host_lock, flags); 5953 5954 /* Complete the requests that are cleared by s/w */ 5955 ufshcd_complete_requests(hba); 5956 hba->silence_err_logs = false; 5957 5958 if (err_xfer || err_tm) { 5959 needs_reset = true; 5960 goto do_reset; 5961 } 5962 5963 /* 5964 * After all reqs and tasks are cleared from doorbell, 5965 * now it is safe to retore power mode. 5966 */ 5967 if (needs_restore) { 5968 spin_unlock_irqrestore(hba->host->host_lock, flags); 5969 /* 5970 * Hold the scaling lock just in case dev cmds 5971 * are sent via bsg and/or sysfs. 5972 */ 5973 down_write(&hba->clk_scaling_lock); 5974 hba->force_pmc = true; 5975 pmc_err = ufshcd_config_pwr_mode(hba, &(hba->pwr_info)); 5976 if (pmc_err) { 5977 needs_reset = true; 5978 dev_err(hba->dev, "%s: Failed to restore power mode, err = %d\n", 5979 __func__, pmc_err); 5980 } 5981 hba->force_pmc = false; 5982 ufshcd_print_pwr_info(hba); 5983 up_write(&hba->clk_scaling_lock); 5984 spin_lock_irqsave(hba->host->host_lock, flags); 5985 } 5986 5987 do_reset: 5988 /* Fatal errors need reset */ 5989 if (needs_reset) { 5990 unsigned long max_doorbells = (1UL << hba->nutrs) - 1; 5991 5992 /* 5993 * ufshcd_reset_and_restore() does the link reinitialization 5994 * which will need atleast one empty doorbell slot to send the 5995 * device management commands (NOP and query commands). 5996 * If there is no slot empty at this moment then free up last 5997 * slot forcefully. 5998 */ 5999 if (hba->outstanding_reqs == max_doorbells) 6000 __ufshcd_transfer_req_compl(hba, 6001 (1UL << (hba->nutrs - 1))); 6002 6003 hba->force_reset = false; 6004 spin_unlock_irqrestore(hba->host->host_lock, flags); 6005 err = ufshcd_reset_and_restore(hba); 6006 if (err) 6007 dev_err(hba->dev, "%s: reset and restore failed with err %d\n", 6008 __func__, err); 6009 else 6010 ufshcd_recover_pm_error(hba); 6011 spin_lock_irqsave(hba->host->host_lock, flags); 6012 } 6013 6014 skip_err_handling: 6015 if (!needs_reset) { 6016 if (hba->ufshcd_state == UFSHCD_STATE_RESET) 6017 hba->ufshcd_state = UFSHCD_STATE_OPERATIONAL; 6018 if (hba->saved_err || hba->saved_uic_err) 6019 dev_err_ratelimited(hba->dev, "%s: exit: saved_err 0x%x saved_uic_err 0x%x", 6020 __func__, hba->saved_err, hba->saved_uic_err); 6021 } 6022 ufshcd_clear_eh_in_progress(hba); 6023 spin_unlock_irqrestore(hba->host->host_lock, flags); 6024 ufshcd_scsi_unblock_requests(hba); 6025 ufshcd_err_handling_unprepare(hba); 6026 up(&hba->eh_sem); 6027 6028 if (!err && needs_reset) 6029 ufshcd_clear_ua_wluns(hba); 6030 } 6031 --- 0-DAY CI Kernel Test Service, Intel Corporation https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 1678cec08b51..377da8e98d9b 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -44,6 +44,9 @@ /* Query request timeout */ #define QUERY_REQ_TIMEOUT 1500 /* 1.5 seconds */ +/* LINERESET TIME OUT */ +#define LINERESET_IO_TIMEOUT_MS (30000) /* 30 sec */ + /* Task management command timeout */ #define TM_CMD_TIMEOUT 100 /* msecs */ @@ -5899,6 +5902,8 @@ static void ufshcd_err_handler(struct work_struct *work) * check if power mode restore is needed. */ if (hba->saved_uic_err & UFSHCD_UIC_PA_GENERIC_ERROR) { + ktime_t start = ktime_get(); + hba->saved_uic_err &= ~UFSHCD_UIC_PA_GENERIC_ERROR; if (!hba->saved_uic_err) hba->saved_err &= ~UIC_ERROR; @@ -5906,6 +5911,20 @@ static void ufshcd_err_handler(struct work_struct *work) if (ufshcd_is_pwr_mode_restore_needed(hba)) needs_restore = true; spin_lock_irqsave(hba->host->host_lock, flags); + /* Wait for IO completion to avoid aborting IOs */ + while (hba->outstanding_reqs) { + ufshcd_complete_requests(hba); + spin_unlock_irqrestore(hba->host->host_lock, flags); + schedule(); + spin_lock_irqsave(hba->host->host_lock, flags); + if (ktime_to_ms(ktime_sub(ktime_get(), start)) > + LINERESET_IO_TIMEOUT_MS) { + dev_err(hba->dev, "%s: timeout, outstanding=%x\n", + __func__, hba->outstanding_reqs); + break; + } + } + if (!hba->saved_err && !needs_restore) goto skip_err_handling; } @@ -6302,9 +6321,13 @@ static irqreturn_t ufshcd_intr(int irq, void *__hba) intr_status = ufshcd_readl(hba, REG_INTERRUPT_STATUS); } - if (enabled_intr_status && retval == IRQ_NONE) { - dev_err(hba->dev, "%s: Unhandled interrupt 0x%08x\n", - __func__, intr_status); + if (enabled_intr_status && retval == IRQ_NONE && + !ufshcd_eh_in_progress(hba)) { + dev_err(hba->dev, "%s: Unhandled interrupt 0x%08x (0x%08x, 0x%08x)\n", + __func__, + intr_status, + hba->ufs_stats.last_intr_status, + enabled_intr_status); ufshcd_dump_regs(hba, 0, UFSHCI_REG_SPACE_SIZE, "host_regs: "); } @@ -6348,7 +6371,11 @@ static int __ufshcd_issue_tm_cmd(struct ufs_hba *hba, * Even though we use wait_event() which sleeps indefinitely, * the maximum wait time is bounded by %TM_CMD_TIMEOUT. */ - req = blk_get_request(q, REQ_OP_DRV_OUT, BLK_MQ_REQ_RESERVED); + req = blk_get_request(q, REQ_OP_DRV_OUT, BLK_MQ_REQ_RESERVED | + BLK_MQ_REQ_NOWAIT); + if (IS_ERR(req)) + return PTR_ERR(req); + req->end_io_data = &wait; free_slot = req->tag; WARN_ON_ONCE(free_slot < 0 || free_slot >= hba->nutmrs); @@ -9355,6 +9382,7 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq) hba->tmf_tag_set = (struct blk_mq_tag_set) { .nr_hw_queues = 1, + .reserved_tags = 1, .queue_depth = hba->nutmrs, .ops = &ufshcd_tmf_ops, .flags = BLK_MQ_F_NO_SCHED,