diff mbox series

[v2,05/12] thermal: core: Fix race between zone registration and system suspend

Message ID 8490245.NyiUUSuA9g@rjwysocki.net (mailing list archive)
State In Next
Delegated to: Rafael Wysocki
Headers show
Series thermal: core: Fixes and cleanups, mostly related to thermal zone init and exit | expand

Commit Message

Rafael J. Wysocki Oct. 4, 2024, 7:19 p.m. UTC
From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

If the registration of a thermal zone takes place at the time when
system suspend is started, thermal_pm_notify() can run before the new
thermal zone is added to thermal_tz_list and its "suspended" flag will
not be set.  Consequently, if __thermal_zone_device_update() is called
for that thermal zone, it will not return early as expected which may
cause some destructive interference with the system suspend or resume
flow to occur.

To avoid that, make thermal_zone_init_complete() introduced previously
set the "suspended" flag for new thermal zones if it runs during system
suspend or resume.

Fixes: 4e814173a8c4 ("thermal: core: Fix thermal zone suspend-resume synchronization")
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---

This is a new iteration of

https://lore.kernel.org/linux-pm/3335807.44csPzL39Z@rjwysocki.net/

v1 -> v2: Rebase and add a fixes tag.

---
 drivers/thermal/thermal_core.c |   18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

Comments

Lukasz Luba Oct. 21, 2024, 10:27 p.m. UTC | #1
On 10/4/24 20:19, Rafael J. Wysocki wrote:
> From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> 
> If the registration of a thermal zone takes place at the time when
> system suspend is started, thermal_pm_notify() can run before the new
> thermal zone is added to thermal_tz_list and its "suspended" flag will
> not be set.  Consequently, if __thermal_zone_device_update() is called
> for that thermal zone, it will not return early as expected which may
> cause some destructive interference with the system suspend or resume
> flow to occur.
> 
> To avoid that, make thermal_zone_init_complete() introduced previously
> set the "suspended" flag for new thermal zones if it runs during system
> suspend or resume.
> 
> Fixes: 4e814173a8c4 ("thermal: core: Fix thermal zone suspend-resume synchronization")
> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> ---
> 
> This is a new iteration of
> 
> https://lore.kernel.org/linux-pm/3335807.44csPzL39Z@rjwysocki.net/
> 
> v1 -> v2: Rebase and add a fixes tag.
> 
> ---
>   drivers/thermal/thermal_core.c |   18 ++++++++++++++++--
>   1 file changed, 16 insertions(+), 2 deletions(-)
> 
> Index: linux-pm/drivers/thermal/thermal_core.c
> ===================================================================
> --- linux-pm.orig/drivers/thermal/thermal_core.c
> +++ linux-pm/drivers/thermal/thermal_core.c
> @@ -40,6 +40,8 @@ static DEFINE_MUTEX(thermal_governor_loc
>   
>   static struct thermal_governor *def_governor;
>   
> +static bool thermal_pm_suspended;
> +
>   /*
>    * Governor section: set of functions to handle thermal governors
>    *
> @@ -1337,6 +1339,14 @@ static void thermal_zone_init_complete(s
>   	mutex_lock(&tz->lock);
>   
>   	tz->state &= ~TZ_STATE_FLAG_INIT;
> +	/*
> +	 * If system suspend or resume is in progress at this point, the
> +	 * new thermal zone needs to be marked as suspended because
> +	 * thermal_pm_notify() has run already.
> +	 */
> +	if (thermal_pm_suspended)
> +		tz->state |= TZ_STATE_FLAG_SUSPENDED;
> +
>   	__thermal_zone_device_update(tz, THERMAL_EVENT_UNSPECIFIED);
>   
>   	mutex_unlock(&tz->lock);
> @@ -1514,10 +1524,10 @@ thermal_zone_device_register_with_trips(
>   	list_for_each_entry(cdev, &thermal_cdev_list, node)
>   		thermal_zone_cdev_bind(tz, cdev);
>   
> -	mutex_unlock(&thermal_list_lock);
> -
>   	thermal_zone_init_complete(tz);
>   
> +	mutex_unlock(&thermal_list_lock);
> +
>   	thermal_notify_tz_create(tz);
>   
>   	thermal_debug_tz_add(tz);
> @@ -1737,6 +1747,8 @@ static int thermal_pm_notify(struct noti
>   	case PM_SUSPEND_PREPARE:
>   		mutex_lock(&thermal_list_lock);
>   
> +		thermal_pm_suspended = true;
> +
>   		list_for_each_entry(tz, &thermal_tz_list, node)
>   			thermal_zone_pm_prepare(tz);
>   
> @@ -1747,6 +1759,8 @@ static int thermal_pm_notify(struct noti
>   	case PM_POST_SUSPEND:
>   		mutex_lock(&thermal_list_lock);
>   
> +		thermal_pm_suspended = false;
> +
>   		list_for_each_entry(tz, &thermal_tz_list, node)
>   			thermal_zone_pm_complete(tz);
>   
> 
> 
> 

Reviewed-by: Lukasz Luba <lukasz.luba@arm.com>
diff mbox series

Patch

Index: linux-pm/drivers/thermal/thermal_core.c
===================================================================
--- linux-pm.orig/drivers/thermal/thermal_core.c
+++ linux-pm/drivers/thermal/thermal_core.c
@@ -40,6 +40,8 @@  static DEFINE_MUTEX(thermal_governor_loc
 
 static struct thermal_governor *def_governor;
 
+static bool thermal_pm_suspended;
+
 /*
  * Governor section: set of functions to handle thermal governors
  *
@@ -1337,6 +1339,14 @@  static void thermal_zone_init_complete(s
 	mutex_lock(&tz->lock);
 
 	tz->state &= ~TZ_STATE_FLAG_INIT;
+	/*
+	 * If system suspend or resume is in progress at this point, the
+	 * new thermal zone needs to be marked as suspended because
+	 * thermal_pm_notify() has run already.
+	 */
+	if (thermal_pm_suspended)
+		tz->state |= TZ_STATE_FLAG_SUSPENDED;
+
 	__thermal_zone_device_update(tz, THERMAL_EVENT_UNSPECIFIED);
 
 	mutex_unlock(&tz->lock);
@@ -1514,10 +1524,10 @@  thermal_zone_device_register_with_trips(
 	list_for_each_entry(cdev, &thermal_cdev_list, node)
 		thermal_zone_cdev_bind(tz, cdev);
 
-	mutex_unlock(&thermal_list_lock);
-
 	thermal_zone_init_complete(tz);
 
+	mutex_unlock(&thermal_list_lock);
+
 	thermal_notify_tz_create(tz);
 
 	thermal_debug_tz_add(tz);
@@ -1737,6 +1747,8 @@  static int thermal_pm_notify(struct noti
 	case PM_SUSPEND_PREPARE:
 		mutex_lock(&thermal_list_lock);
 
+		thermal_pm_suspended = true;
+
 		list_for_each_entry(tz, &thermal_tz_list, node)
 			thermal_zone_pm_prepare(tz);
 
@@ -1747,6 +1759,8 @@  static int thermal_pm_notify(struct noti
 	case PM_POST_SUSPEND:
 		mutex_lock(&thermal_list_lock);
 
+		thermal_pm_suspended = false;
+
 		list_for_each_entry(tz, &thermal_tz_list, node)
 			thermal_zone_pm_complete(tz);