diff mbox series

[v8,3/3] thermal: thermal_core: Allow rebooting after critical temp

Message ID 20230916014928.2848737-3-festevam@gmail.com (mailing list archive)
State Superseded, archived
Headers show
Series [v8,1/3] dt-bindings: thermal-zones: Document critical-action | expand

Commit Message

Fabio Estevam Sept. 16, 2023, 1:49 a.m. UTC
From: Fabio Estevam <festevam@denx.de>

Currently, the default mechanism is to trigger a shutdown after the
critical temperature is reached.

In some embedded cases, such behavior does not suit well, as the board may
be unattended in the field and rebooting may be a better approach.

The bootloader may also check the temperature and only allow the boot to
proceed when the temperature is below a certain threshold.

Introduce support for allowing a reboot to be triggered after the
critical temperature is reached.

If the "critical-action" devicetree property is not found, fall back to
the shutdown action to preserve the existing default behavior.

Tested on a i.MX8MM board with the following devicetree changes:

	thermal-zones {
		critical-action = "reboot";
	};

Signed-off-by: Fabio Estevam <festevam@denx.de>
---
Changes since v7:
- Search for the 'critical-action' property in the parent thermal-zone node. (Rafael)

 drivers/thermal/thermal_core.c |  6 +++++-
 drivers/thermal/thermal_of.c   | 16 ++++++++++++++++
 include/linux/thermal.h        |  6 ++++++
 3 files changed, 27 insertions(+), 1 deletion(-)

Comments

Daniel Lezcano Sept. 28, 2023, 1:30 p.m. UTC | #1
On 16/09/2023 03:49, Fabio Estevam wrote:
> From: Fabio Estevam <festevam@denx.de>
> 
> Currently, the default mechanism is to trigger a shutdown after the
> critical temperature is reached.
> 
> In some embedded cases, such behavior does not suit well, as the board may
> be unattended in the field and rebooting may be a better approach.
> 
> The bootloader may also check the temperature and only allow the boot to
> proceed when the temperature is below a certain threshold.
> 
> Introduce support for allowing a reboot to be triggered after the
> critical temperature is reached.
> 
> If the "critical-action" devicetree property is not found, fall back to
> the shutdown action to preserve the existing default behavior.
> 
> Tested on a i.MX8MM board with the following devicetree changes:
> 
> 	thermal-zones {
> 		critical-action = "reboot";
> 	};
> 
> Signed-off-by: Fabio Estevam <festevam@denx.de>
> ---

The changes are too invasive.

The driver can not set their own default critical callback because it 
will be overwritten by the changes. Moreover the changes should stay in 
thermal-of.

Basically in thermal_of_zone_register(... ops):

if (!ops->critical && trip->reboot)
	ops->critical = thermal_of_reboot();

That will result in:

  - if the driver does not specify its own critical ops, then if no 
action is in the trip point, we keep the old way (shutdown), if reboot 
property is set, then assign our own thermal-of-reboot function.



> Changes since v7:
> - Search for the 'critical-action' property in the parent thermal-zone node. (Rafael)
> 
>   drivers/thermal/thermal_core.c |  6 +++++-
>   drivers/thermal/thermal_of.c   | 16 ++++++++++++++++
>   include/linux/thermal.h        |  6 ++++++
>   3 files changed, 27 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
> index 0bdde1ab5d8b..bfa704516957 100644
> --- a/drivers/thermal/thermal_core.c
> +++ b/drivers/thermal/thermal_core.c
> @@ -320,11 +320,15 @@ void thermal_zone_device_critical(struct thermal_zone_device *tz)
>   	 * Its a must for forced_emergency_poweroff_work to be scheduled.
>   	 */
>   	int poweroff_delay_ms = CONFIG_THERMAL_EMERGENCY_POWEROFF_DELAY_MS;
> +	static const char *msg = "Temperature too high";
>
>   	dev_emerg(&tz->device, "%s: critical temperature reached, "
>   		  "shutting down\n", tz->type);
>   
> -	hw_protection_shutdown("Temperature too high", poweroff_delay_ms);
> +	if (tz->action == THERMAL_CRITICAL_ACTION_REBOOT)
> +		hw_protection_reboot(msg, poweroff_delay_ms);
> +	else
> +		hw_protection_shutdown(msg, poweroff_delay_ms);
>   }
>   EXPORT_SYMBOL(thermal_zone_device_critical);
>   
> diff --git a/drivers/thermal/thermal_of.c b/drivers/thermal/thermal_of.c
> index 4ca905723429..a1472a1965eb 100644
> --- a/drivers/thermal/thermal_of.c
> +++ b/drivers/thermal/thermal_of.c
> @@ -218,6 +218,20 @@ static struct device_node *of_thermal_zone_find(struct device_node *sensor, int
>   	return tz;
>   }
>   
> +static void thermal_of_get_critical_action(struct device_node *np,
> +					   enum thermal_action *action)
> +{
> +	const char *action_string;
> +	int ret;
> +
> +	ret = of_property_read_string(np->parent, "critical-action", &action_string);
> +	if (ret < 0)
> +		*action = THERMAL_CRITICAL_ACTION_SHUTDOWN;
> +
> +	if (!strcasecmp(action_string, "reboot"))
> +		*action = THERMAL_CRITICAL_ACTION_REBOOT;
> +}
> +
>   static int thermal_of_monitor_init(struct device_node *np, int *delay, int *pdelay)
>   {
>   	int ret;
> @@ -516,6 +530,8 @@ static struct thermal_zone_device *thermal_of_zone_register(struct device_node *
>   		goto out_kfree_trips;
>   	}
>   
> +	thermal_of_get_critical_action(np, &tz->action);
> +
>   	ret = thermal_zone_device_enable(tz);
>   	if (ret) {
>   		pr_err("Failed to enabled thermal zone '%s', id=%d: %d\n",
> diff --git a/include/linux/thermal.h b/include/linux/thermal.h
> index eb17495c8acc..8ea761bead79 100644
> --- a/include/linux/thermal.h
> +++ b/include/linux/thermal.h
> @@ -34,6 +34,11 @@ struct thermal_cooling_device;
>   struct thermal_instance;
>   struct thermal_attr;
>   
> +enum thermal_action {
> +	THERMAL_CRITICAL_ACTION_SHUTDOWN = 0, /* shutdown when crit temperature is reached */
> +	THERMAL_CRITICAL_ACTION_REBOOT, /* reboot when crit temperature is reached */
> +};
> +
>   enum thermal_trend {
>   	THERMAL_TREND_STABLE, /* temperature is stable */
>   	THERMAL_TREND_RAISING, /* temperature is raising */
> @@ -183,6 +188,7 @@ struct thermal_zone_device {
>   	struct list_head node;
>   	struct delayed_work poll_queue;
>   	enum thermal_notify_event notify_event;
> +	enum thermal_action action;
>   };
>   
>   /**
diff mbox series

Patch

diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
index 0bdde1ab5d8b..bfa704516957 100644
--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c
@@ -320,11 +320,15 @@  void thermal_zone_device_critical(struct thermal_zone_device *tz)
 	 * Its a must for forced_emergency_poweroff_work to be scheduled.
 	 */
 	int poweroff_delay_ms = CONFIG_THERMAL_EMERGENCY_POWEROFF_DELAY_MS;
+	static const char *msg = "Temperature too high";
 
 	dev_emerg(&tz->device, "%s: critical temperature reached, "
 		  "shutting down\n", tz->type);
 
-	hw_protection_shutdown("Temperature too high", poweroff_delay_ms);
+	if (tz->action == THERMAL_CRITICAL_ACTION_REBOOT)
+		hw_protection_reboot(msg, poweroff_delay_ms);
+	else
+		hw_protection_shutdown(msg, poweroff_delay_ms);
 }
 EXPORT_SYMBOL(thermal_zone_device_critical);
 
diff --git a/drivers/thermal/thermal_of.c b/drivers/thermal/thermal_of.c
index 4ca905723429..a1472a1965eb 100644
--- a/drivers/thermal/thermal_of.c
+++ b/drivers/thermal/thermal_of.c
@@ -218,6 +218,20 @@  static struct device_node *of_thermal_zone_find(struct device_node *sensor, int
 	return tz;
 }
 
+static void thermal_of_get_critical_action(struct device_node *np,
+					   enum thermal_action *action)
+{
+	const char *action_string;
+	int ret;
+
+	ret = of_property_read_string(np->parent, "critical-action", &action_string);
+	if (ret < 0)
+		*action = THERMAL_CRITICAL_ACTION_SHUTDOWN;
+
+	if (!strcasecmp(action_string, "reboot"))
+		*action = THERMAL_CRITICAL_ACTION_REBOOT;
+}
+
 static int thermal_of_monitor_init(struct device_node *np, int *delay, int *pdelay)
 {
 	int ret;
@@ -516,6 +530,8 @@  static struct thermal_zone_device *thermal_of_zone_register(struct device_node *
 		goto out_kfree_trips;
 	}
 
+	thermal_of_get_critical_action(np, &tz->action);
+
 	ret = thermal_zone_device_enable(tz);
 	if (ret) {
 		pr_err("Failed to enabled thermal zone '%s', id=%d: %d\n",
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index eb17495c8acc..8ea761bead79 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -34,6 +34,11 @@  struct thermal_cooling_device;
 struct thermal_instance;
 struct thermal_attr;
 
+enum thermal_action {
+	THERMAL_CRITICAL_ACTION_SHUTDOWN = 0, /* shutdown when crit temperature is reached */
+	THERMAL_CRITICAL_ACTION_REBOOT, /* reboot when crit temperature is reached */
+};
+
 enum thermal_trend {
 	THERMAL_TREND_STABLE, /* temperature is stable */
 	THERMAL_TREND_RAISING, /* temperature is raising */
@@ -183,6 +188,7 @@  struct thermal_zone_device {
 	struct list_head node;
 	struct delayed_work poll_queue;
 	enum thermal_notify_event notify_event;
+	enum thermal_action action;
 };
 
 /**