diff mbox series

PM / core: Fix extra pm_runtime_enable on resume

Message ID 1537913455-43397-1-git-send-email-alcooperx@gmail.com (mailing list archive)
State Superseded, archived
Headers show
Series PM / core: Fix extra pm_runtime_enable on resume | expand

Commit Message

Alan Cooper Sept. 25, 2018, 10:10 p.m. UTC
Matching pm_runtime_disable/pm_runtime_enable routines should be
called for "direct_complete" devices during suspend/resume and there
are cases where the pm_runtime_disable is skipped during suspend but
pm_runtime_enable is still called during resume. This is a problem
because the runtime enable state is really a counter and this can
incorrectly enable pm_runtime when it should not be enabled. This
happens for any direct_complete device doing an async suspend after
the global variable "async_error" is set (which is set by any sync
or async device's suspend error or early wake condition).

This failure is very timing dependent but for testing and debug
the following changes will make it happen more frequently.
- Add an msleep(500) as the first line in async_suspend() in
  drivers/base/power/main.c
- Modify alarmtimer_suspend in kernel/time/alarmtimer.c to just
  return -EBUSY

To see the failure condition that's been fixed with this patch,
enable dynamic debug for drivers/power/main.c and then run
"rtcwake -s 2 -m standby" and grep for
"skipping runtime enable during resume" messages.

Signed-off-by: Al Cooper <alcooperx@gmail.com>
---
 drivers/base/power/main.c | 21 +++++++++++++++++++--
 include/linux/pm.h        |  1 +
 2 files changed, 20 insertions(+), 2 deletions(-)

Comments

Pavel Machek Sept. 27, 2018, 9:46 p.m. UTC | #1
On Tue 2018-09-25 18:10:55, Al Cooper wrote:
> Matching pm_runtime_disable/pm_runtime_enable routines should be
> called for "direct_complete" devices during suspend/resume and there
> are cases where the pm_runtime_disable is skipped during suspend but
> pm_runtime_enable is still called during resume. This is a problem
> because the runtime enable state is really a counter and this can
> incorrectly enable pm_runtime when it should not be enabled. This
> happens for any direct_complete device doing an async suspend after
> the global variable "async_error" is set (which is set by any sync
> or async device's suspend error or early wake condition).
> 
> This failure is very timing dependent but for testing and debug
> the following changes will make it happen more frequently.
> - Add an msleep(500) as the first line in async_suspend() in
>   drivers/base/power/main.c
> - Modify alarmtimer_suspend in kernel/time/alarmtimer.c to just
>   return -EBUSY
> 
> To see the failure condition that's been fixed with this patch,
> enable dynamic debug for drivers/power/main.c and then run
> "rtcwake -s 2 -m standby" and grep for
> "skipping runtime enable during resume" messages.

Thanks for the patch...

Could / should we add some WARN_ONs to pm_runtime_{disable|enable} to
catch stuff like this?
								Pavel


> Signed-off-by: Al Cooper <alcooperx@gmail.com>
> ---
>  drivers/base/power/main.c | 21 +++++++++++++++++++--
>  include/linux/pm.h        |  1 +
>  2 files changed, 20 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
> index 3f68e2919dc5..2dc40662aae0 100644
> --- a/drivers/base/power/main.c
> +++ b/drivers/base/power/main.c
> @@ -945,7 +945,13 @@ static int device_resume(struct device *dev, pm_message_t state, bool async)
>  
>  	if (dev->power.direct_complete) {
>  		/* Match the pm_runtime_disable() in __device_suspend(). */
> -		pm_runtime_enable(dev);
> +		if (dev->power.pm_runtime_disabled) {
> +			pm_runtime_enable(dev);
> +			dev->power.pm_runtime_disabled = false;
> +		} else {
> +			pm_dev_dbg(dev, state,
> +				   "skipping runtime enable during ");
> +		}
>  		goto Complete;
>  	}
>  
> @@ -1736,8 +1742,19 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async)
>  	if (dev->power.direct_complete) {
>  		if (pm_runtime_status_suspended(dev)) {
>  			pm_runtime_disable(dev);
> -			if (pm_runtime_status_suspended(dev))
> +			if (pm_runtime_status_suspended(dev)) {
> +				/*
> +				 * If any device's sync or async suspend fails
> +				 * and sets async_error, any async suspend for
> +				 * direct_complete devices after the failure
> +				 * will not execute the pm_runtime_disable
> +				 * above. This flag lets the async device's
> +				 * resume function (which is always run) know
> +				 * if a matching pm_runtime_enable is needed.
> +				 */
> +				dev->power.pm_runtime_disabled = true;
>  				goto Complete;
> +			}
>  
>  			pm_runtime_enable(dev);
>  		}
> diff --git a/include/linux/pm.h b/include/linux/pm.h
> index e723b78d8357..45738ad977fd 100644
> --- a/include/linux/pm.h
> +++ b/include/linux/pm.h
> @@ -593,6 +593,7 @@ struct dev_pm_info {
>  	bool			is_late_suspended:1;
>  	bool			early_init:1;	/* Owned by the PM core */
>  	bool			direct_complete:1;	/* Owned by the PM core */
> +	unsigned int		pm_runtime_disabled:1;
>  	u32			driver_flags;
>  	spinlock_t		lock;
>  #ifdef CONFIG_PM_SLEEP
Rafael J. Wysocki Oct. 3, 2018, 8:30 a.m. UTC | #2
On Wednesday, September 26, 2018 12:10:55 AM CEST Al Cooper wrote:
> Matching pm_runtime_disable/pm_runtime_enable routines should be
> called for "direct_complete" devices during suspend/resume and there
> are cases where the pm_runtime_disable is skipped during suspend but
> pm_runtime_enable is still called during resume. This is a problem
> because the runtime enable state is really a counter and this can
> incorrectly enable pm_runtime when it should not be enabled. This
> happens for any direct_complete device doing an async suspend after
> the global variable "async_error" is set (which is set by any sync
> or async device's suspend error or early wake condition).

So the bug is simply that the direct_complete flag is not cleared
when we are going to bail out of __device_suspend() early due to an
error or wakeup.

The patch below should fix it then (without adding extra flags to
struct dev_pm_info), shouldn't it?

---
 drivers/base/power/main.c |    5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

Index: linux-pm/drivers/base/power/main.c
===================================================================
--- linux-pm.orig/drivers/base/power/main.c
+++ linux-pm/drivers/base/power/main.c
@@ -1713,8 +1713,10 @@ static int __device_suspend(struct devic
 
 	dpm_wait_for_subordinate(dev, async);
 
-	if (async_error)
+	if (async_error) {
+		dev->power.direct_complete = false;
 		goto Complete;
+	}
 
 	/*
 	 * If a device configured to wake up the system from sleep states
@@ -1726,6 +1728,7 @@ static int __device_suspend(struct devic
 		pm_wakeup_event(dev, 0);
 
 	if (pm_wakeup_pending()) {
+		dev->power.direct_complete = false;
 		async_error = -EBUSY;
 		goto Complete;
 	}
diff mbox series

Patch

diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index 3f68e2919dc5..2dc40662aae0 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -945,7 +945,13 @@  static int device_resume(struct device *dev, pm_message_t state, bool async)
 
 	if (dev->power.direct_complete) {
 		/* Match the pm_runtime_disable() in __device_suspend(). */
-		pm_runtime_enable(dev);
+		if (dev->power.pm_runtime_disabled) {
+			pm_runtime_enable(dev);
+			dev->power.pm_runtime_disabled = false;
+		} else {
+			pm_dev_dbg(dev, state,
+				   "skipping runtime enable during ");
+		}
 		goto Complete;
 	}
 
@@ -1736,8 +1742,19 @@  static int __device_suspend(struct device *dev, pm_message_t state, bool async)
 	if (dev->power.direct_complete) {
 		if (pm_runtime_status_suspended(dev)) {
 			pm_runtime_disable(dev);
-			if (pm_runtime_status_suspended(dev))
+			if (pm_runtime_status_suspended(dev)) {
+				/*
+				 * If any device's sync or async suspend fails
+				 * and sets async_error, any async suspend for
+				 * direct_complete devices after the failure
+				 * will not execute the pm_runtime_disable
+				 * above. This flag lets the async device's
+				 * resume function (which is always run) know
+				 * if a matching pm_runtime_enable is needed.
+				 */
+				dev->power.pm_runtime_disabled = true;
 				goto Complete;
+			}
 
 			pm_runtime_enable(dev);
 		}
diff --git a/include/linux/pm.h b/include/linux/pm.h
index e723b78d8357..45738ad977fd 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -593,6 +593,7 @@  struct dev_pm_info {
 	bool			is_late_suspended:1;
 	bool			early_init:1;	/* Owned by the PM core */
 	bool			direct_complete:1;	/* Owned by the PM core */
+	unsigned int		pm_runtime_disabled:1;
 	u32			driver_flags;
 	spinlock_t		lock;
 #ifdef CONFIG_PM_SLEEP