diff mbox series

ata: libata-scsi: Avoid deadlock on rescan after device resume

Message ID 20230615083326.161875-1-dlemoal@kernel.org (mailing list archive)
State Not Applicable
Headers show
Series ata: libata-scsi: Avoid deadlock on rescan after device resume | expand

Commit Message

Damien Le Moal June 15, 2023, 8:33 a.m. UTC
When an ATA port is resumed from sleep, the port is reset and a power
management request issued to libata EH to reset the port and rescanning
the device(s) attached to the port. Device rescanning is done by
scheduling an ata_scsi_dev_rescan() work, which will execute
scsi_rescan_device().

However, scsi_rescan_device() takes the generic device lock, which is
also taken by dpm_resume() when the SCSI device is resumed as well. If
a device rescan execution starts before the completion of the SCSI
device resume, the rcu locking used to refresh the cached VPD pages of
the device, combined with the generic device locking from
scsi_rescan_device() and from dpm_resume() can cause a deadlock.

Avoid this situation by changing struct ata_port scsi_rescan_task to be
a delayed work instead of a simple work_struct. ata_scsi_dev_rescan() is
modified to check if the SCSI device associated with the ATA device that
must be rescanned is not suspended. If the SCSI device is still
suspended, ata_scsi_dev_rescan() returns early and reschedule itself for
execution after an arbitrary delay of 5ms.

Reported-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
Reported-by: Joe Breuer <linux-kernel@jmbreuer.net>
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=217530
Fixes: a19a93e4c6a9 ("scsi: core: pm: Rely on the device driver core for async power management")
Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
---
 drivers/ata/libata-core.c |  3 ++-
 drivers/ata/libata-eh.c   |  2 +-
 drivers/ata/libata-scsi.c | 22 +++++++++++++++++++++-
 include/linux/libata.h    |  2 +-
 4 files changed, 25 insertions(+), 4 deletions(-)

Comments

Damien Le Moal June 15, 2023, 8:35 a.m. UTC | #1
On 6/15/23 17:33, Damien Le Moal wrote:
> When an ATA port is resumed from sleep, the port is reset and a power
> management request issued to libata EH to reset the port and rescanning
> the device(s) attached to the port. Device rescanning is done by
> scheduling an ata_scsi_dev_rescan() work, which will execute
> scsi_rescan_device().
> 
> However, scsi_rescan_device() takes the generic device lock, which is
> also taken by dpm_resume() when the SCSI device is resumed as well. If
> a device rescan execution starts before the completion of the SCSI
> device resume, the rcu locking used to refresh the cached VPD pages of
> the device, combined with the generic device locking from
> scsi_rescan_device() and from dpm_resume() can cause a deadlock.
> 
> Avoid this situation by changing struct ata_port scsi_rescan_task to be
> a delayed work instead of a simple work_struct. ata_scsi_dev_rescan() is
> modified to check if the SCSI device associated with the ATA device that
> must be rescanned is not suspended. If the SCSI device is still
> suspended, ata_scsi_dev_rescan() returns early and reschedule itself for
> execution after an arbitrary delay of 5ms.

Joe, Kai-Heng,

I cannot recreate the issue on my test machines. So it would be great if you
could test this new candidate fix.

It is not exactly pretty, but a real fix needs a lot more work and more testing.
So that will be for later.

> 
> Reported-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
> Reported-by: Joe Breuer <linux-kernel@jmbreuer.net>
> Closes: https://bugzilla.kernel.org/show_bug.cgi?id=217530
> Fixes: a19a93e4c6a9 ("scsi: core: pm: Rely on the device driver core for async power management")
> Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
> ---
>  drivers/ata/libata-core.c |  3 ++-
>  drivers/ata/libata-eh.c   |  2 +-
>  drivers/ata/libata-scsi.c | 22 +++++++++++++++++++++-
>  include/linux/libata.h    |  2 +-
>  4 files changed, 25 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
> index 8bf612bdd61a..b4f246f0cac7 100644
> --- a/drivers/ata/libata-core.c
> +++ b/drivers/ata/libata-core.c
> @@ -5348,7 +5348,7 @@ struct ata_port *ata_port_alloc(struct ata_host *host)
>  
>  	mutex_init(&ap->scsi_scan_mutex);
>  	INIT_DELAYED_WORK(&ap->hotplug_task, ata_scsi_hotplug);
> -	INIT_WORK(&ap->scsi_rescan_task, ata_scsi_dev_rescan);
> +	INIT_DELAYED_WORK(&ap->scsi_rescan_task, ata_scsi_dev_rescan);
>  	INIT_LIST_HEAD(&ap->eh_done_q);
>  	init_waitqueue_head(&ap->eh_wait_q);
>  	init_completion(&ap->park_req_pending);
> @@ -5954,6 +5954,7 @@ static void ata_port_detach(struct ata_port *ap)
>  	WARN_ON(!(ap->pflags & ATA_PFLAG_UNLOADED));
>  
>  	cancel_delayed_work_sync(&ap->hotplug_task);
> +	cancel_delayed_work_sync(&ap->scsi_rescan_task);
>  
>   skip_eh:
>  	/* clean up zpodd on port removal */
> diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
> index a6c901811802..6f8d14191593 100644
> --- a/drivers/ata/libata-eh.c
> +++ b/drivers/ata/libata-eh.c
> @@ -2984,7 +2984,7 @@ static int ata_eh_revalidate_and_attach(struct ata_link *link,
>  			ehc->i.flags |= ATA_EHI_SETMODE;
>  
>  			/* schedule the scsi_rescan_device() here */
> -			schedule_work(&(ap->scsi_rescan_task));
> +			schedule_delayed_work(&ap->scsi_rescan_task, 0);
>  		} else if (dev->class == ATA_DEV_UNKNOWN &&
>  			   ehc->tries[dev->devno] &&
>  			   ata_class_enabled(ehc->classes[dev->devno])) {
> diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
> index 8ce90284eb34..551077cea4e4 100644
> --- a/drivers/ata/libata-scsi.c
> +++ b/drivers/ata/libata-scsi.c
> @@ -4597,10 +4597,11 @@ int ata_scsi_user_scan(struct Scsi_Host *shost, unsigned int channel,
>  void ata_scsi_dev_rescan(struct work_struct *work)
>  {
>  	struct ata_port *ap =
> -		container_of(work, struct ata_port, scsi_rescan_task);
> +		container_of(work, struct ata_port, scsi_rescan_task.work);
>  	struct ata_link *link;
>  	struct ata_device *dev;
>  	unsigned long flags;
> +	bool delay_rescan = false;
>  
>  	mutex_lock(&ap->scsi_scan_mutex);
>  	spin_lock_irqsave(ap->lock, flags);
> @@ -4614,6 +4615,21 @@ void ata_scsi_dev_rescan(struct work_struct *work)
>  			if (scsi_device_get(sdev))
>  				continue;
>  
> +			/*
> +			 * If the rescan work was scheduled because of a resume
> +			 * event, the port is already fully resumed, but the
> +			 * SCSI device may not yet be fully resumed. In such
> +			 * case, executing scsi_rescan_device() may cause a
> +			 * deadlock with the PM code on device_lock(). Prevent
> +			 * this by giving up and retrying rescan after a short
> +			 * delay.
> +			 */
> +			delay_rescan = sdev->sdev_gendev.power.is_suspended;
> +			if (delay_rescan) {
> +				scsi_device_put(sdev);
> +				break;
> +			}
> +
>  			spin_unlock_irqrestore(ap->lock, flags);
>  			scsi_rescan_device(&(sdev->sdev_gendev));
>  			scsi_device_put(sdev);
> @@ -4623,4 +4639,8 @@ void ata_scsi_dev_rescan(struct work_struct *work)
>  
>  	spin_unlock_irqrestore(ap->lock, flags);
>  	mutex_unlock(&ap->scsi_scan_mutex);
> +
> +	if (delay_rescan)
> +		schedule_delayed_work(&ap->scsi_rescan_task,
> +				      msecs_to_jiffies(5));
>  }
> diff --git a/include/linux/libata.h b/include/linux/libata.h
> index 311cd93377c7..dd5797fb6305 100644
> --- a/include/linux/libata.h
> +++ b/include/linux/libata.h
> @@ -836,7 +836,7 @@ struct ata_port {
>  
>  	struct mutex		scsi_scan_mutex;
>  	struct delayed_work	hotplug_task;
> -	struct work_struct	scsi_rescan_task;
> +	struct delayed_work	scsi_rescan_task;
>  
>  	unsigned int		hsm_task_state;
>
Hannes Reinecke June 15, 2023, 8:41 a.m. UTC | #2
On 6/15/23 10:33, Damien Le Moal wrote:
> When an ATA port is resumed from sleep, the port is reset and a power
> management request issued to libata EH to reset the port and rescanning
> the device(s) attached to the port. Device rescanning is done by
> scheduling an ata_scsi_dev_rescan() work, which will execute
> scsi_rescan_device().
> 
> However, scsi_rescan_device() takes the generic device lock, which is
> also taken by dpm_resume() when the SCSI device is resumed as well. If
> a device rescan execution starts before the completion of the SCSI
> device resume, the rcu locking used to refresh the cached VPD pages of
> the device, combined with the generic device locking from
> scsi_rescan_device() and from dpm_resume() can cause a deadlock.
> 
> Avoid this situation by changing struct ata_port scsi_rescan_task to be
> a delayed work instead of a simple work_struct. ata_scsi_dev_rescan() is
> modified to check if the SCSI device associated with the ATA device that
> must be rescanned is not suspended. If the SCSI device is still
> suspended, ata_scsi_dev_rescan() returns early and reschedule itself for
> execution after an arbitrary delay of 5ms.
> 
> Reported-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
> Reported-by: Joe Breuer <linux-kernel@jmbreuer.net>
> Closes: https://bugzilla.kernel.org/show_bug.cgi?id=217530
> Fixes: a19a93e4c6a9 ("scsi: core: pm: Rely on the device driver core for async power management")
> Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
> ---
>   drivers/ata/libata-core.c |  3 ++-
>   drivers/ata/libata-eh.c   |  2 +-
>   drivers/ata/libata-scsi.c | 22 +++++++++++++++++++++-
>   include/linux/libata.h    |  2 +-
>   4 files changed, 25 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
> index 8bf612bdd61a..b4f246f0cac7 100644
> --- a/drivers/ata/libata-core.c
> +++ b/drivers/ata/libata-core.c
> @@ -5348,7 +5348,7 @@ struct ata_port *ata_port_alloc(struct ata_host *host)
>   
>   	mutex_init(&ap->scsi_scan_mutex);
>   	INIT_DELAYED_WORK(&ap->hotplug_task, ata_scsi_hotplug);
> -	INIT_WORK(&ap->scsi_rescan_task, ata_scsi_dev_rescan);
> +	INIT_DELAYED_WORK(&ap->scsi_rescan_task, ata_scsi_dev_rescan);
>   	INIT_LIST_HEAD(&ap->eh_done_q);
>   	init_waitqueue_head(&ap->eh_wait_q);
>   	init_completion(&ap->park_req_pending);
> @@ -5954,6 +5954,7 @@ static void ata_port_detach(struct ata_port *ap)
>   	WARN_ON(!(ap->pflags & ATA_PFLAG_UNLOADED));
>   
>   	cancel_delayed_work_sync(&ap->hotplug_task);
> +	cancel_delayed_work_sync(&ap->scsi_rescan_task);
>   
>    skip_eh:
>   	/* clean up zpodd on port removal */
> diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
> index a6c901811802..6f8d14191593 100644
> --- a/drivers/ata/libata-eh.c
> +++ b/drivers/ata/libata-eh.c
> @@ -2984,7 +2984,7 @@ static int ata_eh_revalidate_and_attach(struct ata_link *link,
>   			ehc->i.flags |= ATA_EHI_SETMODE;
>   
>   			/* schedule the scsi_rescan_device() here */
> -			schedule_work(&(ap->scsi_rescan_task));
> +			schedule_delayed_work(&ap->scsi_rescan_task, 0);
>   		} else if (dev->class == ATA_DEV_UNKNOWN &&
>   			   ehc->tries[dev->devno] &&
>   			   ata_class_enabled(ehc->classes[dev->devno])) {
> diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
> index 8ce90284eb34..551077cea4e4 100644
> --- a/drivers/ata/libata-scsi.c
> +++ b/drivers/ata/libata-scsi.c
> @@ -4597,10 +4597,11 @@ int ata_scsi_user_scan(struct Scsi_Host *shost, unsigned int channel,
>   void ata_scsi_dev_rescan(struct work_struct *work)
>   {
>   	struct ata_port *ap =
> -		container_of(work, struct ata_port, scsi_rescan_task);
> +		container_of(work, struct ata_port, scsi_rescan_task.work);
>   	struct ata_link *link;
>   	struct ata_device *dev;
>   	unsigned long flags;
> +	bool delay_rescan = false;
>   
>   	mutex_lock(&ap->scsi_scan_mutex);
>   	spin_lock_irqsave(ap->lock, flags);
> @@ -4614,6 +4615,21 @@ void ata_scsi_dev_rescan(struct work_struct *work)
>   			if (scsi_device_get(sdev))
>   				continue;
>   
> +			/*
> +			 * If the rescan work was scheduled because of a resume
> +			 * event, the port is already fully resumed, but the
> +			 * SCSI device may not yet be fully resumed. In such
> +			 * case, executing scsi_rescan_device() may cause a
> +			 * deadlock with the PM code on device_lock(). Prevent
> +			 * this by giving up and retrying rescan after a short
> +			 * delay.
> +			 */
> +			delay_rescan = sdev->sdev_gendev.power.is_suspended;
> +			if (delay_rescan) {
> +				scsi_device_put(sdev);
> +				break;
> +			}
> +
>   			spin_unlock_irqrestore(ap->lock, flags);
>   			scsi_rescan_device(&(sdev->sdev_gendev));
>   			scsi_device_put(sdev);
> @@ -4623,4 +4639,8 @@ void ata_scsi_dev_rescan(struct work_struct *work)
>   
>   	spin_unlock_irqrestore(ap->lock, flags);
>   	mutex_unlock(&ap->scsi_scan_mutex);
> +
> +	if (delay_rescan)
> +		schedule_delayed_work(&ap->scsi_rescan_task,
> +				      msecs_to_jiffies(5));
>   }
> diff --git a/include/linux/libata.h b/include/linux/libata.h
> index 311cd93377c7..dd5797fb6305 100644
> --- a/include/linux/libata.h
> +++ b/include/linux/libata.h
> @@ -836,7 +836,7 @@ struct ata_port {
>   
>   	struct mutex		scsi_scan_mutex;
>   	struct delayed_work	hotplug_task;
> -	struct work_struct	scsi_rescan_task;
> +	struct delayed_work	scsi_rescan_task;
>   
>   	unsigned int		hsm_task_state;
>   

Hehe. That is exactly what I had in mind.

Reviewed-by: Hannes Reinecke <hare@suse.de>

Cheers,

Hannes
Damien Le Moal June 15, 2023, 8:45 a.m. UTC | #3
On 6/15/23 17:41, Hannes Reinecke wrote:
> On 6/15/23 10:33, Damien Le Moal wrote:
>> When an ATA port is resumed from sleep, the port is reset and a power
>> management request issued to libata EH to reset the port and rescanning
>> the device(s) attached to the port. Device rescanning is done by
>> scheduling an ata_scsi_dev_rescan() work, which will execute
>> scsi_rescan_device().
>>
>> However, scsi_rescan_device() takes the generic device lock, which is
>> also taken by dpm_resume() when the SCSI device is resumed as well. If
>> a device rescan execution starts before the completion of the SCSI
>> device resume, the rcu locking used to refresh the cached VPD pages of
>> the device, combined with the generic device locking from
>> scsi_rescan_device() and from dpm_resume() can cause a deadlock.
>>
>> Avoid this situation by changing struct ata_port scsi_rescan_task to be
>> a delayed work instead of a simple work_struct. ata_scsi_dev_rescan() is
>> modified to check if the SCSI device associated with the ATA device that
>> must be rescanned is not suspended. If the SCSI device is still
>> suspended, ata_scsi_dev_rescan() returns early and reschedule itself for
>> execution after an arbitrary delay of 5ms.
>>
>> Reported-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
>> Reported-by: Joe Breuer <linux-kernel@jmbreuer.net>
>> Closes: https://bugzilla.kernel.org/show_bug.cgi?id=217530
>> Fixes: a19a93e4c6a9 ("scsi: core: pm: Rely on the device driver core for async power management")
>> Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
>> ---
>>   drivers/ata/libata-core.c |  3 ++-
>>   drivers/ata/libata-eh.c   |  2 +-
>>   drivers/ata/libata-scsi.c | 22 +++++++++++++++++++++-
>>   include/linux/libata.h    |  2 +-
>>   4 files changed, 25 insertions(+), 4 deletions(-)
>>
>> diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
>> index 8bf612bdd61a..b4f246f0cac7 100644
>> --- a/drivers/ata/libata-core.c
>> +++ b/drivers/ata/libata-core.c
>> @@ -5348,7 +5348,7 @@ struct ata_port *ata_port_alloc(struct ata_host *host)
>>   
>>   	mutex_init(&ap->scsi_scan_mutex);
>>   	INIT_DELAYED_WORK(&ap->hotplug_task, ata_scsi_hotplug);
>> -	INIT_WORK(&ap->scsi_rescan_task, ata_scsi_dev_rescan);
>> +	INIT_DELAYED_WORK(&ap->scsi_rescan_task, ata_scsi_dev_rescan);
>>   	INIT_LIST_HEAD(&ap->eh_done_q);
>>   	init_waitqueue_head(&ap->eh_wait_q);
>>   	init_completion(&ap->park_req_pending);
>> @@ -5954,6 +5954,7 @@ static void ata_port_detach(struct ata_port *ap)
>>   	WARN_ON(!(ap->pflags & ATA_PFLAG_UNLOADED));
>>   
>>   	cancel_delayed_work_sync(&ap->hotplug_task);
>> +	cancel_delayed_work_sync(&ap->scsi_rescan_task);
>>   
>>    skip_eh:
>>   	/* clean up zpodd on port removal */
>> diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
>> index a6c901811802..6f8d14191593 100644
>> --- a/drivers/ata/libata-eh.c
>> +++ b/drivers/ata/libata-eh.c
>> @@ -2984,7 +2984,7 @@ static int ata_eh_revalidate_and_attach(struct ata_link *link,
>>   			ehc->i.flags |= ATA_EHI_SETMODE;
>>   
>>   			/* schedule the scsi_rescan_device() here */
>> -			schedule_work(&(ap->scsi_rescan_task));
>> +			schedule_delayed_work(&ap->scsi_rescan_task, 0);
>>   		} else if (dev->class == ATA_DEV_UNKNOWN &&
>>   			   ehc->tries[dev->devno] &&
>>   			   ata_class_enabled(ehc->classes[dev->devno])) {
>> diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
>> index 8ce90284eb34..551077cea4e4 100644
>> --- a/drivers/ata/libata-scsi.c
>> +++ b/drivers/ata/libata-scsi.c
>> @@ -4597,10 +4597,11 @@ int ata_scsi_user_scan(struct Scsi_Host *shost, unsigned int channel,
>>   void ata_scsi_dev_rescan(struct work_struct *work)
>>   {
>>   	struct ata_port *ap =
>> -		container_of(work, struct ata_port, scsi_rescan_task);
>> +		container_of(work, struct ata_port, scsi_rescan_task.work);
>>   	struct ata_link *link;
>>   	struct ata_device *dev;
>>   	unsigned long flags;
>> +	bool delay_rescan = false;
>>   
>>   	mutex_lock(&ap->scsi_scan_mutex);
>>   	spin_lock_irqsave(ap->lock, flags);
>> @@ -4614,6 +4615,21 @@ void ata_scsi_dev_rescan(struct work_struct *work)
>>   			if (scsi_device_get(sdev))
>>   				continue;
>>   
>> +			/*
>> +			 * If the rescan work was scheduled because of a resume
>> +			 * event, the port is already fully resumed, but the
>> +			 * SCSI device may not yet be fully resumed. In such
>> +			 * case, executing scsi_rescan_device() may cause a
>> +			 * deadlock with the PM code on device_lock(). Prevent
>> +			 * this by giving up and retrying rescan after a short
>> +			 * delay.
>> +			 */
>> +			delay_rescan = sdev->sdev_gendev.power.is_suspended;
>> +			if (delay_rescan) {
>> +				scsi_device_put(sdev);
>> +				break;
>> +			}
>> +
>>   			spin_unlock_irqrestore(ap->lock, flags);
>>   			scsi_rescan_device(&(sdev->sdev_gendev));
>>   			scsi_device_put(sdev);
>> @@ -4623,4 +4639,8 @@ void ata_scsi_dev_rescan(struct work_struct *work)
>>   
>>   	spin_unlock_irqrestore(ap->lock, flags);
>>   	mutex_unlock(&ap->scsi_scan_mutex);
>> +
>> +	if (delay_rescan)
>> +		schedule_delayed_work(&ap->scsi_rescan_task,
>> +				      msecs_to_jiffies(5));
>>   }
>> diff --git a/include/linux/libata.h b/include/linux/libata.h
>> index 311cd93377c7..dd5797fb6305 100644
>> --- a/include/linux/libata.h
>> +++ b/include/linux/libata.h
>> @@ -836,7 +836,7 @@ struct ata_port {
>>   
>>   	struct mutex		scsi_scan_mutex;
>>   	struct delayed_work	hotplug_task;
>> -	struct work_struct	scsi_rescan_task;
>> +	struct delayed_work	scsi_rescan_task;
>>   
>>   	unsigned int		hsm_task_state;
>>   
> 
> Hehe. That is exactly what I had in mind.

The direct test of "is_suspended" is not exactly pretty. But there are a few
drivers doing it... So, it will have to do for now.

> 
> Reviewed-by: Hannes Reinecke <hare@suse.de>
> 
> Cheers,
> 
> Hannes
>
Alan Stern June 15, 2023, 2:50 p.m. UTC | #4
On Thu, Jun 15, 2023 at 05:33:26PM +0900, Damien Le Moal wrote:
> When an ATA port is resumed from sleep, the port is reset and a power
> management request issued to libata EH to reset the port and rescanning
> the device(s) attached to the port. Device rescanning is done by
> scheduling an ata_scsi_dev_rescan() work, which will execute
> scsi_rescan_device().
> 
> However, scsi_rescan_device() takes the generic device lock, which is
> also taken by dpm_resume() when the SCSI device is resumed as well. If
> a device rescan execution starts before the completion of the SCSI
> device resume, the rcu locking used to refresh the cached VPD pages of
> the device, combined with the generic device locking from
> scsi_rescan_device() and from dpm_resume() can cause a deadlock.
> 
> Avoid this situation by changing struct ata_port scsi_rescan_task to be
> a delayed work instead of a simple work_struct. ata_scsi_dev_rescan() is
> modified to check if the SCSI device associated with the ATA device that
> must be rescanned is not suspended. If the SCSI device is still
> suspended, ata_scsi_dev_rescan() returns early and reschedule itself for
> execution after an arbitrary delay of 5ms.

I don't understand the nature of the relationship between the ATA port
and the corresponding SCSI device.  Maybe you could explain it more
fully, if you have time.

But in any case, this approach seems like a layering violation.  Why not 
instead call a SCSI utility routine to set a "needs_rescan" flag in the 
scsi_device structure?  Then scsi_device_resume() could automatically 
call scsi_rescan_device() -- or rather an internal version that assumes 
the device lock is already held -- if the flag is set.  Or it could 
queue a non-delayed work routine to do this.  (Is it important to have 
the rescan finish before userspace starts up and tries to access the ATA 
device again?)

That, combined with a guaranteed order of resuming, would do what you 
want, right?

Alan Stern
Kai-Heng Feng June 16, 2023, 3:31 a.m. UTC | #5
On Thu, Jun 15, 2023 at 4:33 PM Damien Le Moal <dlemoal@kernel.org> wrote:
>
> When an ATA port is resumed from sleep, the port is reset and a power
> management request issued to libata EH to reset the port and rescanning
> the device(s) attached to the port. Device rescanning is done by
> scheduling an ata_scsi_dev_rescan() work, which will execute
> scsi_rescan_device().
>
> However, scsi_rescan_device() takes the generic device lock, which is
> also taken by dpm_resume() when the SCSI device is resumed as well. If
> a device rescan execution starts before the completion of the SCSI
> device resume, the rcu locking used to refresh the cached VPD pages of
> the device, combined with the generic device locking from
> scsi_rescan_device() and from dpm_resume() can cause a deadlock.
>
> Avoid this situation by changing struct ata_port scsi_rescan_task to be
> a delayed work instead of a simple work_struct. ata_scsi_dev_rescan() is
> modified to check if the SCSI device associated with the ATA device that
> must be rescanned is not suspended. If the SCSI device is still
> suspended, ata_scsi_dev_rescan() returns early and reschedule itself for
> execution after an arbitrary delay of 5ms.
>
> Reported-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
> Reported-by: Joe Breuer <linux-kernel@jmbreuer.net>
> Closes: https://bugzilla.kernel.org/show_bug.cgi?id=217530
> Fixes: a19a93e4c6a9 ("scsi: core: pm: Rely on the device driver core for async power management")
> Signed-off-by: Damien Le Moal <dlemoal@kernel.org>

Tested-by: Kai-Heng Feng <kai.heng.feng@canonical.com>

> ---
>  drivers/ata/libata-core.c |  3 ++-
>  drivers/ata/libata-eh.c   |  2 +-
>  drivers/ata/libata-scsi.c | 22 +++++++++++++++++++++-
>  include/linux/libata.h    |  2 +-
>  4 files changed, 25 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
> index 8bf612bdd61a..b4f246f0cac7 100644
> --- a/drivers/ata/libata-core.c
> +++ b/drivers/ata/libata-core.c
> @@ -5348,7 +5348,7 @@ struct ata_port *ata_port_alloc(struct ata_host *host)
>
>         mutex_init(&ap->scsi_scan_mutex);
>         INIT_DELAYED_WORK(&ap->hotplug_task, ata_scsi_hotplug);
> -       INIT_WORK(&ap->scsi_rescan_task, ata_scsi_dev_rescan);
> +       INIT_DELAYED_WORK(&ap->scsi_rescan_task, ata_scsi_dev_rescan);
>         INIT_LIST_HEAD(&ap->eh_done_q);
>         init_waitqueue_head(&ap->eh_wait_q);
>         init_completion(&ap->park_req_pending);
> @@ -5954,6 +5954,7 @@ static void ata_port_detach(struct ata_port *ap)
>         WARN_ON(!(ap->pflags & ATA_PFLAG_UNLOADED));
>
>         cancel_delayed_work_sync(&ap->hotplug_task);
> +       cancel_delayed_work_sync(&ap->scsi_rescan_task);
>
>   skip_eh:
>         /* clean up zpodd on port removal */
> diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
> index a6c901811802..6f8d14191593 100644
> --- a/drivers/ata/libata-eh.c
> +++ b/drivers/ata/libata-eh.c
> @@ -2984,7 +2984,7 @@ static int ata_eh_revalidate_and_attach(struct ata_link *link,
>                         ehc->i.flags |= ATA_EHI_SETMODE;
>
>                         /* schedule the scsi_rescan_device() here */
> -                       schedule_work(&(ap->scsi_rescan_task));
> +                       schedule_delayed_work(&ap->scsi_rescan_task, 0);
>                 } else if (dev->class == ATA_DEV_UNKNOWN &&
>                            ehc->tries[dev->devno] &&
>                            ata_class_enabled(ehc->classes[dev->devno])) {
> diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
> index 8ce90284eb34..551077cea4e4 100644
> --- a/drivers/ata/libata-scsi.c
> +++ b/drivers/ata/libata-scsi.c
> @@ -4597,10 +4597,11 @@ int ata_scsi_user_scan(struct Scsi_Host *shost, unsigned int channel,
>  void ata_scsi_dev_rescan(struct work_struct *work)
>  {
>         struct ata_port *ap =
> -               container_of(work, struct ata_port, scsi_rescan_task);
> +               container_of(work, struct ata_port, scsi_rescan_task.work);
>         struct ata_link *link;
>         struct ata_device *dev;
>         unsigned long flags;
> +       bool delay_rescan = false;
>
>         mutex_lock(&ap->scsi_scan_mutex);
>         spin_lock_irqsave(ap->lock, flags);
> @@ -4614,6 +4615,21 @@ void ata_scsi_dev_rescan(struct work_struct *work)
>                         if (scsi_device_get(sdev))
>                                 continue;
>
> +                       /*
> +                        * If the rescan work was scheduled because of a resume
> +                        * event, the port is already fully resumed, but the
> +                        * SCSI device may not yet be fully resumed. In such
> +                        * case, executing scsi_rescan_device() may cause a
> +                        * deadlock with the PM code on device_lock(). Prevent
> +                        * this by giving up and retrying rescan after a short
> +                        * delay.
> +                        */
> +                       delay_rescan = sdev->sdev_gendev.power.is_suspended;
> +                       if (delay_rescan) {
> +                               scsi_device_put(sdev);
> +                               break;
> +                       }
> +
>                         spin_unlock_irqrestore(ap->lock, flags);
>                         scsi_rescan_device(&(sdev->sdev_gendev));
>                         scsi_device_put(sdev);
> @@ -4623,4 +4639,8 @@ void ata_scsi_dev_rescan(struct work_struct *work)
>
>         spin_unlock_irqrestore(ap->lock, flags);
>         mutex_unlock(&ap->scsi_scan_mutex);
> +
> +       if (delay_rescan)
> +               schedule_delayed_work(&ap->scsi_rescan_task,
> +                                     msecs_to_jiffies(5));
>  }
> diff --git a/include/linux/libata.h b/include/linux/libata.h
> index 311cd93377c7..dd5797fb6305 100644
> --- a/include/linux/libata.h
> +++ b/include/linux/libata.h
> @@ -836,7 +836,7 @@ struct ata_port {
>
>         struct mutex            scsi_scan_mutex;
>         struct delayed_work     hotplug_task;
> -       struct work_struct      scsi_rescan_task;
> +       struct delayed_work     scsi_rescan_task;
>
>         unsigned int            hsm_task_state;
>
> --
> 2.40.1
>
Kai-Heng Feng June 16, 2023, 3:32 a.m. UTC | #6
On Thu, Jun 15, 2023 at 10:50 PM Alan Stern <stern@rowland.harvard.edu> wrote:
>
> On Thu, Jun 15, 2023 at 05:33:26PM +0900, Damien Le Moal wrote:
> > When an ATA port is resumed from sleep, the port is reset and a power
> > management request issued to libata EH to reset the port and rescanning
> > the device(s) attached to the port. Device rescanning is done by
> > scheduling an ata_scsi_dev_rescan() work, which will execute
> > scsi_rescan_device().
> >
> > However, scsi_rescan_device() takes the generic device lock, which is
> > also taken by dpm_resume() when the SCSI device is resumed as well. If
> > a device rescan execution starts before the completion of the SCSI
> > device resume, the rcu locking used to refresh the cached VPD pages of
> > the device, combined with the generic device locking from
> > scsi_rescan_device() and from dpm_resume() can cause a deadlock.
> >
> > Avoid this situation by changing struct ata_port scsi_rescan_task to be
> > a delayed work instead of a simple work_struct. ata_scsi_dev_rescan() is
> > modified to check if the SCSI device associated with the ATA device that
> > must be rescanned is not suspended. If the SCSI device is still
> > suspended, ata_scsi_dev_rescan() returns early and reschedule itself for
> > execution after an arbitrary delay of 5ms.
>
> I don't understand the nature of the relationship between the ATA port
> and the corresponding SCSI device.  Maybe you could explain it more
> fully, if you have time.
>
> But in any case, this approach seems like a layering violation.  Why not
> instead call a SCSI utility routine to set a "needs_rescan" flag in the
> scsi_device structure?  Then scsi_device_resume() could automatically
> call scsi_rescan_device() -- or rather an internal version that assumes
> the device lock is already held -- if the flag is set.  Or it could
> queue a non-delayed work routine to do this.  (Is it important to have
> the rescan finish before userspace starts up and tries to access the ATA
> device again?)
>
> That, combined with a guaranteed order of resuming, would do what you
> want, right?

What you are suggesting is pretty much like my previous approach:
https://lore.kernel.org/all/20230502150435.423770-2-kai.heng.feng@canonical.com/

Kai-Heng

>
> Alan Stern
Damien Le Moal June 16, 2023, 5:25 a.m. UTC | #7
On 6/15/23 23:50, Alan Stern wrote:
> On Thu, Jun 15, 2023 at 05:33:26PM +0900, Damien Le Moal wrote:
>> When an ATA port is resumed from sleep, the port is reset and a power
>> management request issued to libata EH to reset the port and rescanning
>> the device(s) attached to the port. Device rescanning is done by
>> scheduling an ata_scsi_dev_rescan() work, which will execute
>> scsi_rescan_device().
>>
>> However, scsi_rescan_device() takes the generic device lock, which is
>> also taken by dpm_resume() when the SCSI device is resumed as well. If
>> a device rescan execution starts before the completion of the SCSI
>> device resume, the rcu locking used to refresh the cached VPD pages of
>> the device, combined with the generic device locking from
>> scsi_rescan_device() and from dpm_resume() can cause a deadlock.
>>
>> Avoid this situation by changing struct ata_port scsi_rescan_task to be
>> a delayed work instead of a simple work_struct. ata_scsi_dev_rescan() is
>> modified to check if the SCSI device associated with the ATA device that
>> must be rescanned is not suspended. If the SCSI device is still
>> suspended, ata_scsi_dev_rescan() returns early and reschedule itself for
>> execution after an arbitrary delay of 5ms.
> 
> I don't understand the nature of the relationship between the ATA port
> and the corresponding SCSI device.  Maybe you could explain it more
> fully, if you have time.

For ata devices, the parent -> child relationship is:

ata_host (the adapter) -> ata_port -> ata_link -> ata_device (HDD, SSD or ATAPI
CD/DVD)

For scsi devices representing ATA devices, it is:

ata_port -> scsi_host -> scsi_target -> scsi_device -> scsi_disk (or gendisk for
a CD/DVD)

When devices are scanned, libata will create ports and create a scsi_host for
each port, and a scsi device for each ata_device found on the link(s) for the
port. There is no direct relationship between an ata_device (the HDD or SSD) and
its scsi_device/scsi_disk (the device used to issue commands). The PM operations
we have are for ata_port and scsi_device. For the scsi device, the operations
are actually defined per device type, so in the scsi_disk driver (sd) for HDDs
and SSDs.

> But in any case, this approach seems like a layering violation.  Why not 

The layering violation is I think only with the direct reference to the scsi
device power.is_suspended field, which is definitely not pretty. But there are
some other drivers doing something similar:

$ git grep "power\.is_suspended" | grep -v drivers/base/power/main.c
drivers/gpu/drm/i915/display/intel_display_power_well.c:	if
(!dev_priv->drm.dev->power.is_suspended)
drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c:	if
(!dwmac->dev->power.is_suspended) {
drivers/platform/surface/surface_acpi_notify.c:	if (d->dev->power.is_suspended) {

All the other code (ata calling scsi) is normal per the SCSI-to-ata translation
needed (all ata devices are represented as scsi devices in Linux, following the
SAT=scsi ATA translation specifications).

> instead call a SCSI utility routine to set a "needs_rescan" flag in the 
> scsi_device structure?  Then scsi_device_resume() could automatically 
> call scsi_rescan_device() -- or rather an internal version that assumes 
> the device lock is already held -- if the flag is set.  Or it could 

Yes, ideally, that is what we should do. Such fix is however more involved, and
so I prefer not to push for this right now as a fix for the regression at hand.
But I will definitively look into this.

> queue a non-delayed work routine to do this.  (Is it important to have 
> the rescan finish before userspace starts up and tries to access the ATA 
> device again?)
> 
> That, combined with a guaranteed order of resuming, would do what you 
> want, right?

Yes. But again more fixes needed:
1) libata uses its error handling path to reset a port on resume and probe the
links again. The devices found are then revalidated (IDENTIFY command, reading
log pages etc). This call to EH is triggered in the pm->resume operation, but EH
runs as an asynchronous task. So the port->resume may complete before EH is
done. We need to change the EH call to be synchronous in ->resume
2) We need to remove triggering the task that does scsi_rescan_device() in EH
and move that trigger to scsi_device ->resume.
3) Modify scsi_device ->resume() to call scsi_rescan_device()

Safely doing (3) requires synchronization between ata_port->resume and
scsi_device->resume. We can do that by adding a device link between ata_device
and scsi_device. Doing so, the scsi device becomes the grandchild of the
ata_port and we are guaranteed that its ->resume will happen only once the ata
port ->resume is done.

That will also improve things as we will be able to rescan the scsi device (and
thus catch any change to the device) *before* the device ->resume operation
re-enables issuing user commands by un-quiescing the device request queue.

As you can see, that is all beyond a quick fix for a regression... Will work on
this.

Cheers.

> 
> Alan Stern
Damien Le Moal June 16, 2023, 5:28 a.m. UTC | #8
On 6/16/23 12:32, Kai-Heng Feng wrote:
> On Thu, Jun 15, 2023 at 10:50 PM Alan Stern <stern@rowland.harvard.edu> wrote:
>>
>> On Thu, Jun 15, 2023 at 05:33:26PM +0900, Damien Le Moal wrote:
>>> When an ATA port is resumed from sleep, the port is reset and a power
>>> management request issued to libata EH to reset the port and rescanning
>>> the device(s) attached to the port. Device rescanning is done by
>>> scheduling an ata_scsi_dev_rescan() work, which will execute
>>> scsi_rescan_device().
>>>
>>> However, scsi_rescan_device() takes the generic device lock, which is
>>> also taken by dpm_resume() when the SCSI device is resumed as well. If
>>> a device rescan execution starts before the completion of the SCSI
>>> device resume, the rcu locking used to refresh the cached VPD pages of
>>> the device, combined with the generic device locking from
>>> scsi_rescan_device() and from dpm_resume() can cause a deadlock.
>>>
>>> Avoid this situation by changing struct ata_port scsi_rescan_task to be
>>> a delayed work instead of a simple work_struct. ata_scsi_dev_rescan() is
>>> modified to check if the SCSI device associated with the ATA device that
>>> must be rescanned is not suspended. If the SCSI device is still
>>> suspended, ata_scsi_dev_rescan() returns early and reschedule itself for
>>> execution after an arbitrary delay of 5ms.
>>
>> I don't understand the nature of the relationship between the ATA port
>> and the corresponding SCSI device.  Maybe you could explain it more
>> fully, if you have time.
>>
>> But in any case, this approach seems like a layering violation.  Why not
>> instead call a SCSI utility routine to set a "needs_rescan" flag in the
>> scsi_device structure?  Then scsi_device_resume() could automatically
>> call scsi_rescan_device() -- or rather an internal version that assumes
>> the device lock is already held -- if the flag is set.  Or it could
>> queue a non-delayed work routine to do this.  (Is it important to have
>> the rescan finish before userspace starts up and tries to access the ATA
>> device again?)
>>
>> That, combined with a guaranteed order of resuming, would do what you
>> want, right?
> 
> What you are suggesting is pretty much like my previous approach:
> https://lore.kernel.org/all/20230502150435.423770-2-kai.heng.feng@canonical.com/

Not really. We need more than what you did. See my reply to Alan.
Your solution is rather similar to what I did but it was delaying the rescan
after the entire system is resumed (pm_suspend_target_state != PM_SUSPEND_ON),
which is really a heavy hammer and would significantly slow down resuming.

> 
> Kai-Heng
> 
>>
>> Alan Stern
Damien Le Moal June 16, 2023, 5:28 a.m. UTC | #9
On 6/16/23 12:31, Kai-Heng Feng wrote:
> On Thu, Jun 15, 2023 at 4:33 PM Damien Le Moal <dlemoal@kernel.org> wrote:
>>
>> When an ATA port is resumed from sleep, the port is reset and a power
>> management request issued to libata EH to reset the port and rescanning
>> the device(s) attached to the port. Device rescanning is done by
>> scheduling an ata_scsi_dev_rescan() work, which will execute
>> scsi_rescan_device().
>>
>> However, scsi_rescan_device() takes the generic device lock, which is
>> also taken by dpm_resume() when the SCSI device is resumed as well. If
>> a device rescan execution starts before the completion of the SCSI
>> device resume, the rcu locking used to refresh the cached VPD pages of
>> the device, combined with the generic device locking from
>> scsi_rescan_device() and from dpm_resume() can cause a deadlock.
>>
>> Avoid this situation by changing struct ata_port scsi_rescan_task to be
>> a delayed work instead of a simple work_struct. ata_scsi_dev_rescan() is
>> modified to check if the SCSI device associated with the ATA device that
>> must be rescanned is not suspended. If the SCSI device is still
>> suspended, ata_scsi_dev_rescan() returns early and reschedule itself for
>> execution after an arbitrary delay of 5ms.
>>
>> Reported-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
>> Reported-by: Joe Breuer <linux-kernel@jmbreuer.net>
>> Closes: https://bugzilla.kernel.org/show_bug.cgi?id=217530
>> Fixes: a19a93e4c6a9 ("scsi: core: pm: Rely on the device driver core for async power management")
>> Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
> 
> Tested-by: Kai-Heng Feng <kai.heng.feng@canonical.com>

Thanks !

Joe,

Did you have a chance to test ?
Alan Stern June 16, 2023, 2:25 p.m. UTC | #10
On Fri, Jun 16, 2023 at 02:25:38PM +0900, Damien Le Moal wrote:
> On 6/15/23 23:50, Alan Stern wrote:
> > On Thu, Jun 15, 2023 at 05:33:26PM +0900, Damien Le Moal wrote:
> >> When an ATA port is resumed from sleep, the port is reset and a power
> >> management request issued to libata EH to reset the port and rescanning
> >> the device(s) attached to the port. Device rescanning is done by
> >> scheduling an ata_scsi_dev_rescan() work, which will execute
> >> scsi_rescan_device().
> >>
> >> However, scsi_rescan_device() takes the generic device lock, which is
> >> also taken by dpm_resume() when the SCSI device is resumed as well. If
> >> a device rescan execution starts before the completion of the SCSI
> >> device resume, the rcu locking used to refresh the cached VPD pages of
> >> the device, combined with the generic device locking from
> >> scsi_rescan_device() and from dpm_resume() can cause a deadlock.
> >>
> >> Avoid this situation by changing struct ata_port scsi_rescan_task to be
> >> a delayed work instead of a simple work_struct. ata_scsi_dev_rescan() is
> >> modified to check if the SCSI device associated with the ATA device that
> >> must be rescanned is not suspended. If the SCSI device is still
> >> suspended, ata_scsi_dev_rescan() returns early and reschedule itself for
> >> execution after an arbitrary delay of 5ms.
> > 
> > I don't understand the nature of the relationship between the ATA port
> > and the corresponding SCSI device.  Maybe you could explain it more
> > fully, if you have time.
> 
> For ata devices, the parent -> child relationship is:
> 
> ata_host (the adapter) -> ata_port -> ata_link -> ata_device (HDD, SSD or ATAPI
> CD/DVD)
> 
> For scsi devices representing ATA devices, it is:
> 
> ata_port -> scsi_host -> scsi_target -> scsi_device -> scsi_disk (or gendisk for
> a CD/DVD)
> 
> When devices are scanned, libata will create ports and create a scsi_host for
> each port, and a scsi device for each ata_device found on the link(s) for the
> port. There is no direct relationship between an ata_device (the HDD or SSD) and
> its scsi_device/scsi_disk (the device used to issue commands). The PM operations
> we have are for ata_port and scsi_device. For the scsi device, the operations
> are actually defined per device type, so in the scsi_disk driver (sd) for HDDs
> and SSDs.

Thanks for the explanation.  So there are two device structures in the 
kernel representing the same physical piece of hardware.  No wonder you 
sometimes run into trouble!

On a more positive note, it sounds like each ata_device is always 
created and registered before the corresponding scsi_device, which means 
that synchronous suspends and resumes are guaranteed to work the way you 
want.

> > But in any case, this approach seems like a layering violation.  Why not 
> 
> The layering violation is I think only with the direct reference to the scsi
> device power.is_suspended field, which is definitely not pretty. But there are
> some other drivers doing something similar:
> 
> $ git grep "power\.is_suspended" | grep -v drivers/base/power/main.c
> drivers/gpu/drm/i915/display/intel_display_power_well.c:	if
> (!dev_priv->drm.dev->power.is_suspended)
> drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c:	if
> (!dwmac->dev->power.is_suspended) {
> drivers/platform/surface/surface_acpi_notify.c:	if (d->dev->power.is_suspended) {
> 
> All the other code (ata calling scsi) is normal per the SCSI-to-ata translation
> needed (all ata devices are represented as scsi devices in Linux, following the
> SAT=scsi ATA translation specifications).
> 
> > instead call a SCSI utility routine to set a "needs_rescan" flag in the 
> > scsi_device structure?  Then scsi_device_resume() could automatically 
> > call scsi_rescan_device() -- or rather an internal version that assumes 
> > the device lock is already held -- if the flag is set.  Or it could 
> 
> Yes, ideally, that is what we should do. Such fix is however more involved, and
> so I prefer not to push for this right now as a fix for the regression at hand.
> But I will definitively look into this.
> 
> > queue a non-delayed work routine to do this.  (Is it important to have 
> > the rescan finish before userspace starts up and tries to access the ATA 
> > device again?)
> > 
> > That, combined with a guaranteed order of resuming, would do what you 
> > want, right?
> 
> Yes. But again more fixes needed:
> 1) libata uses its error handling path to reset a port on resume and probe the
> links again. The devices found are then revalidated (IDENTIFY command, reading
> log pages etc). This call to EH is triggered in the pm->resume operation, but EH
> runs as an asynchronous task. So the port->resume may complete before EH is
> done. We need to change the EH call to be synchronous in ->resume
> 2) We need to remove triggering the task that does scsi_rescan_device() in EH
> and move that trigger to scsi_device ->resume.
> 3) Modify scsi_device ->resume() to call scsi_rescan_device()
> 
> Safely doing (3) requires synchronization between ata_port->resume and
> scsi_device->resume. We can do that by adding a device link between ata_device
> and scsi_device. Doing so, the scsi device becomes the grandchild of the
> ata_port and we are guaranteed that its ->resume will happen only once the ata
> port ->resume is done.

That change should be made in any case.  SCSI device drivers assume they 
can send PM-related commands to the device as part of carrying out the 
resume procedure.  (I don't remember whether they actually do so, but 
they are allowed to make this assumption.)  Obviously such commands 
won't work unless the ata_port is fully resumed first.

> That will also improve things as we will be able to rescan the scsi device (and
> thus catch any change to the device) *before* the device ->resume operation
> re-enables issuing user commands by un-quiescing the device request queue.

Just so.

> As you can see, that is all beyond a quick fix for a regression... Will work on
> this.

It seems like a good plan of approach.

Alan Stern
Joe Breuer June 17, 2023, 6:55 a.m. UTC | #11
On 15.06.23 10:35, Damien Le Moal wrote:
> On 6/15/23 17:33, Damien Le Moal wrote:
> Joe, Kai-Heng,
> 
> I cannot recreate the issue on my test machines. So it would be great if you
> could test this new candidate fix.

Thank you for looking into this!

I've tried this patch against 6.3.4 on the affected machine, and it 
looks like a full success. Suspend/resume work without any deadlocks, 
and the optical drive stays usable after resuming.

Since the patch and discussion around it mentions async vs 'guaranteed 
order' power management: My original tests (without any patch) were with 
both pm_async=0 and pm_async=1, no difference on my affected system.
Also, with pm_trace=1, which is documented to imply pm_async=0.

>> Reported-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
>> Reported-by: Joe Breuer <linux-kernel@jmbreuer.net>
>> Closes: https://bugzilla.kernel.org/show_bug.cgi?id=217530
>> Fixes: a19a93e4c6a9 ("scsi: core: pm: Rely on the device driver core for async power management")
>> Signed-off-by: Damien Le Moal <dlemoal@kernel.org>

Tested-by: <linux-kernel@jmbreuer.net>
diff mbox series

Patch

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 8bf612bdd61a..b4f246f0cac7 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -5348,7 +5348,7 @@  struct ata_port *ata_port_alloc(struct ata_host *host)
 
 	mutex_init(&ap->scsi_scan_mutex);
 	INIT_DELAYED_WORK(&ap->hotplug_task, ata_scsi_hotplug);
-	INIT_WORK(&ap->scsi_rescan_task, ata_scsi_dev_rescan);
+	INIT_DELAYED_WORK(&ap->scsi_rescan_task, ata_scsi_dev_rescan);
 	INIT_LIST_HEAD(&ap->eh_done_q);
 	init_waitqueue_head(&ap->eh_wait_q);
 	init_completion(&ap->park_req_pending);
@@ -5954,6 +5954,7 @@  static void ata_port_detach(struct ata_port *ap)
 	WARN_ON(!(ap->pflags & ATA_PFLAG_UNLOADED));
 
 	cancel_delayed_work_sync(&ap->hotplug_task);
+	cancel_delayed_work_sync(&ap->scsi_rescan_task);
 
  skip_eh:
 	/* clean up zpodd on port removal */
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index a6c901811802..6f8d14191593 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -2984,7 +2984,7 @@  static int ata_eh_revalidate_and_attach(struct ata_link *link,
 			ehc->i.flags |= ATA_EHI_SETMODE;
 
 			/* schedule the scsi_rescan_device() here */
-			schedule_work(&(ap->scsi_rescan_task));
+			schedule_delayed_work(&ap->scsi_rescan_task, 0);
 		} else if (dev->class == ATA_DEV_UNKNOWN &&
 			   ehc->tries[dev->devno] &&
 			   ata_class_enabled(ehc->classes[dev->devno])) {
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index 8ce90284eb34..551077cea4e4 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -4597,10 +4597,11 @@  int ata_scsi_user_scan(struct Scsi_Host *shost, unsigned int channel,
 void ata_scsi_dev_rescan(struct work_struct *work)
 {
 	struct ata_port *ap =
-		container_of(work, struct ata_port, scsi_rescan_task);
+		container_of(work, struct ata_port, scsi_rescan_task.work);
 	struct ata_link *link;
 	struct ata_device *dev;
 	unsigned long flags;
+	bool delay_rescan = false;
 
 	mutex_lock(&ap->scsi_scan_mutex);
 	spin_lock_irqsave(ap->lock, flags);
@@ -4614,6 +4615,21 @@  void ata_scsi_dev_rescan(struct work_struct *work)
 			if (scsi_device_get(sdev))
 				continue;
 
+			/*
+			 * If the rescan work was scheduled because of a resume
+			 * event, the port is already fully resumed, but the
+			 * SCSI device may not yet be fully resumed. In such
+			 * case, executing scsi_rescan_device() may cause a
+			 * deadlock with the PM code on device_lock(). Prevent
+			 * this by giving up and retrying rescan after a short
+			 * delay.
+			 */
+			delay_rescan = sdev->sdev_gendev.power.is_suspended;
+			if (delay_rescan) {
+				scsi_device_put(sdev);
+				break;
+			}
+
 			spin_unlock_irqrestore(ap->lock, flags);
 			scsi_rescan_device(&(sdev->sdev_gendev));
 			scsi_device_put(sdev);
@@ -4623,4 +4639,8 @@  void ata_scsi_dev_rescan(struct work_struct *work)
 
 	spin_unlock_irqrestore(ap->lock, flags);
 	mutex_unlock(&ap->scsi_scan_mutex);
+
+	if (delay_rescan)
+		schedule_delayed_work(&ap->scsi_rescan_task,
+				      msecs_to_jiffies(5));
 }
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 311cd93377c7..dd5797fb6305 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -836,7 +836,7 @@  struct ata_port {
 
 	struct mutex		scsi_scan_mutex;
 	struct delayed_work	hotplug_task;
-	struct work_struct	scsi_rescan_task;
+	struct delayed_work	scsi_rescan_task;
 
 	unsigned int		hsm_task_state;