diff mbox series

[v1] scsi: ufs: Fix deadlock issue in ufshcd_wait_for_doorbell_clr()

Message ID 20211213230045.492994-1-huobean@gmail.com (mailing list archive)
State Superseded
Headers show
Series [v1] scsi: ufs: Fix deadlock issue in ufshcd_wait_for_doorbell_clr() | expand

Commit Message

Bean Huo Dec. 13, 2021, 11 p.m. UTC
From: Bean Huo <beanhuo@micron.com>

Call shost_for_each_device() with host->host_lock is held will cause
a deadlock situation, which will cause the system to stall (the log
as follow). Fix this issue by narrowing the scope of the lock.

stalls on CPUs/tasks:
all trace:
__switch_to+0x120/0x170
0xffff800011643998
ask dump for CPU 5:
ask:kworker/u16:2   state:R  running task     stack:    0 pid:   80 ppid:     2 flags:0x0000000a
orkqueue: events_unbound async_run_entry_fn
all trace:
__switch_to+0x120/0x170
0x0
ask dump for CPU 6:
ask:kworker/u16:6   state:R  running task     stack:    0 pid:  164 ppid:     2 flags:0x0000000a
orkqueue: events_unbound async_run_entry_fn
all trace:
__switch_to+0x120/0x170
0xffff54e7c4429f80
ask dump for CPU 7:
ask:kworker/u16:4   state:R  running task     stack:    0 pid:  153 ppid:     2 flags:0x0000000a
orkqueue: events_unbound async_run_entry_fn
all trace:
__switch_to+0x120/0x170
blk_mq_run_hw_queue+0x34/0x110
blk_mq_sched_insert_request+0xb0/0x120
blk_execute_rq_nowait+0x68/0x88
blk_execute_rq+0x4c/0xd8
__scsi_execute+0xec/0x1d0
scsi_vpd_inquiry+0x84/0xf0
scsi_get_vpd_buf+0x34/0xb8
scsi_attach_vpd+0x34/0x140
scsi_probe_and_add_lun+0xa6c/0xab8
__scsi_scan_target+0x438/0x4f8
scsi_scan_channel+0x6c/0xa8
scsi_scan_host_selected+0xf0/0x150
do_scsi_scan_host+0x88/0x90
scsi_scan_host+0x1b4/0x1d0
ufshcd_async_scan+0x248/0x310
async_run_entry_fn+0x30/0x178
process_one_work+0x1e8/0x368
worker_thread+0x40/0x478
kthread+0x174/0x180
ret_from_fork+0x10/0x20

Fixes: 8d077ede48c1 ("scsi: ufs: Optimize the command queueing code")
Signed-off-by: Bean Huo <beanhuo@micron.com>
---
 drivers/scsi/ufs/ufshcd.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

Comments

Bart Van Assche Dec. 14, 2021, midnight UTC | #1
On 12/13/21 3:00 PM, Bean Huo wrote:
> Call shost_for_each_device() with host->host_lock is held will cause
> a deadlock situation, which will cause the system to stall (the log
> as follow). Fix this issue by narrowing the scope of the lock.

Hi Bean,

As you probably know I do not have access to a test setup that supports clock
scaling. Has the following patch been considered?

diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 6d692aae67ce..244eddf0caf8 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -1084,7 +1084,9 @@ static u32 ufshcd_pending_cmds(struct ufs_hba *hba)
  	struct scsi_device *sdev;
  	u32 pending = 0;

-	shost_for_each_device(sdev, hba->host)
+	lockdep_assert_held(hba->host->host_lock);
+
+	__shost_for_each_device(sdev, hba->host)
  		pending += sbitmap_weight(&sdev->budget_map);

  	return pending;

Thanks,

Bart.
John Stultz Dec. 14, 2021, 4:55 a.m. UTC | #2
On Mon, Dec 13, 2021 at 8:15 PM Bart Van Assche <bvanassche@acm.org> wrote:
>
> On 12/13/21 3:00 PM, Bean Huo wrote:
> > Call shost_for_each_device() with host->host_lock is held will cause
> > a deadlock situation, which will cause the system to stall (the log
> > as follow). Fix this issue by narrowing the scope of the lock.
>
> Hi Bean,
>
> As you probably know I do not have access to a test setup that supports clock
> scaling. Has the following patch been considered?
>
> diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
> index 6d692aae67ce..244eddf0caf8 100644
> --- a/drivers/scsi/ufs/ufshcd.c
> +++ b/drivers/scsi/ufs/ufshcd.c
> @@ -1084,7 +1084,9 @@ static u32 ufshcd_pending_cmds(struct ufs_hba *hba)
>         struct scsi_device *sdev;
>         u32 pending = 0;
>
> -       shost_for_each_device(sdev, hba->host)
> +       lockdep_assert_held(hba->host->host_lock);
> +
> +       __shost_for_each_device(sdev, hba->host)
>                 pending += sbitmap_weight(&sdev->budget_map);

We hit the same issue today as well, and this solution works on db845c.

Reported-by: YongQin Liu <yongqin.liu@linaro.org>
Reported-by: Amit Pundir <amit.pundir@linaro.org>
Tested-by: John Stultz <john.stultz@linaro.org>

thanks
-john
diff mbox series

Patch

diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 6dd517267f1b..15333a327b93 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -1099,19 +1099,21 @@  static int ufshcd_wait_for_doorbell_clr(struct ufs_hba *hba,
 	ktime_t start;
 
 	ufshcd_hold(hba, false);
-	spin_lock_irqsave(hba->host->host_lock, flags);
 	/*
 	 * Wait for all the outstanding tasks/transfer requests.
 	 * Verify by checking the doorbell registers are clear.
 	 */
 	start = ktime_get();
 	do {
+		spin_lock_irqsave(hba->host->host_lock, flags);
 		if (hba->ufshcd_state != UFSHCD_STATE_OPERATIONAL) {
 			ret = -EBUSY;
+			spin_unlock_irqrestore(hba->host->host_lock, flags);
 			goto out;
 		}
-
 		tm_doorbell = ufshcd_readl(hba, REG_UTP_TASK_REQ_DOOR_BELL);
+		spin_unlock_irqrestore(hba->host->host_lock, flags);
+
 		tr_pending = ufshcd_pending_cmds(hba);
 		if (!tm_doorbell && !tr_pending) {
 			timeout = false;
@@ -1120,7 +1122,6 @@  static int ufshcd_wait_for_doorbell_clr(struct ufs_hba *hba,
 			break;
 		}
 
-		spin_unlock_irqrestore(hba->host->host_lock, flags);
 		schedule();
 		if (ktime_to_us(ktime_sub(ktime_get(), start)) >
 		    wait_timeout_us) {
@@ -1132,7 +1133,6 @@  static int ufshcd_wait_for_doorbell_clr(struct ufs_hba *hba,
 			 */
 			do_last_check = true;
 		}
-		spin_lock_irqsave(hba->host->host_lock, flags);
 	} while (tm_doorbell || tr_pending);
 
 	if (timeout) {
@@ -1142,7 +1142,6 @@  static int ufshcd_wait_for_doorbell_clr(struct ufs_hba *hba,
 		ret = -EBUSY;
 	}
 out:
-	spin_unlock_irqrestore(hba->host->host_lock, flags);
 	ufshcd_release(hba);
 	return ret;
 }