diff mbox series

[RFC,2/2] thermal core: improve thermal zone device mode control

Message ID 1541352911-21343-2-git-send-email-rui.zhang@intel.com (mailing list archive)
State RFC
Delegated to: Zhang Rui
Headers show
Series [RFC,1/2] thermal: remove redundant polling timer update | expand

Commit Message

Zhang Rui Nov. 4, 2018, 5:35 p.m. UTC
Thermal "mode" sysfs attribute is introduced to enable/disable a thermal
zone, and .set_mode() callback is introduced for platform thermal driver
to enable/disable the hardware thermal control logic.

But, thermal core does not handle disabled thermal zone well and this
brings a couple of issues.
1. thermal core polling timer for disabled thermal zone still runs.
2. thermal core still pokes disabled thermal zone occasionally, e.g. upon
   system resume.
3. platform thermal driver can not register a disabled thermal zone.

To fix this, a new flag 'mode' is introduced in struct thermal_zone_device
to represent the thermal zone mode, and several decisions have been made
based on this flag, including
a. check the thermal zone mode right after it's registered.
b. skip updating thermal zone if the thermal zone is disabled.
c. stop the polling timer when the thermal zone is disabled.

TODO:
This patch fixes issue 1 and 2, but for issue 3, for drivers that needs to
register a disabled thermal zone, they should setup tzp->disalbed
explicitly during registration and invokes thermal_zone_set_mode() when
the hardware is ready.

Signed-off-by: Zhang Rui <rui.zhang@intel.com>
---
 drivers/thermal/thermal_core.c  | 39 ++++++++++++++++++++++++++++++++++++++-
 drivers/thermal/thermal_sysfs.c | 27 +++++++--------------------
 include/linux/thermal.h         |  9 +++++++++
 3 files changed, 54 insertions(+), 21 deletions(-)

Comments

Daniel Lezcano Nov. 5, 2018, 2:33 p.m. UTC | #1
On 04/11/2018 18:35, Zhang Rui wrote:
> Thermal "mode" sysfs attribute is introduced to enable/disable a thermal
> zone, and .set_mode() callback is introduced for platform thermal driver
> to enable/disable the hardware thermal control logic.
> 
> But, thermal core does not handle disabled thermal zone well and this
> brings a couple of issues.
> 1. thermal core polling timer for disabled thermal zone still runs.
> 2. thermal core still pokes disabled thermal zone occasionally, e.g. upon
>    system resume.
> 3. platform thermal driver can not register a disabled thermal zone.
> 
> To fix this, a new flag 'mode' is introduced in struct thermal_zone_device
> to represent the thermal zone mode, and several decisions have been made
> based on this flag, including
> a. check the thermal zone mode right after it's registered.
> b. skip updating thermal zone if the thermal zone is disabled.
> c. stop the polling timer when the thermal zone is disabled.
> 
> TODO:
> This patch fixes issue 1 and 2, but for issue 3, for drivers that needs to
> register a disabled thermal zone, they should setup tzp->disalbed
> explicitly during registration and invokes thermal_zone_set_mode() when
> the hardware is ready.

Is there a situation where we want an enabled thermal zone without
association with a driver ?

> Signed-off-by: Zhang Rui <rui.zhang@intel.com>
> ---
>  drivers/thermal/thermal_core.c  | 39 ++++++++++++++++++++++++++++++++++++++-
>  drivers/thermal/thermal_sysfs.c | 27 +++++++--------------------
>  include/linux/thermal.h         |  9 +++++++++
>  3 files changed, 54 insertions(+), 21 deletions(-)
> 
> diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
> index 98f02b0..65a984a 100644
> --- a/drivers/thermal/thermal_core.c
> +++ b/drivers/thermal/thermal_core.c
> @@ -305,7 +305,9 @@ static void monitor_thermal_zone(struct thermal_zone_device *tz)
>  {
>  	mutex_lock(&tz->lock);
>  
> -	if (tz->passive)
> +	if (tz->mode == THERMAL_DEVICE_DISABLED)
> +		thermal_zone_device_set_polling(tz, 0);
> +	else if (tz->passive)
>  		thermal_zone_device_set_polling(tz, tz->passive_delay);
>  	else if (tz->polling_delay)
>  		thermal_zone_device_set_polling(tz, tz->polling_delay);
> @@ -458,11 +460,33 @@ static void thermal_zone_device_reset(struct thermal_zone_device *tz)
>  		pos->initialized = false;
>  }
>  
> +int thermal_zone_set_mode(struct thermal_zone_device *tz,
> +				 enum thermal_device_mode mode)
> +{
> +	int result;
> +
> +	if (tz->mode == mode)
> +		return 0;
> +
> +	if (tz->ops->set_mode) {
> +		result = tz->ops->set_mode(tz, mode);
> +		if (result)
> +			return result;
> +	}
> +
> +	thermal_zone_device_update(tz, THERMAL_EVENT_UNSPECIFIED);
> +	return 0;
> +}
> +EXPORT_SYMBOL_GPL(thermal_zone_set_mode);
> +
>  void thermal_zone_device_update(struct thermal_zone_device *tz,
>  				enum thermal_notify_event event)
>  {
>  	int count;
>  
> +	if (tz->mode == THERMAL_DEVICE_DISABLED)
> +		goto update_timer;
> +
>  	if (atomic_read(&in_suspend))
>  		return;
>  
> @@ -477,6 +501,8 @@ void thermal_zone_device_update(struct thermal_zone_device *tz,
>  
>  	for (count = 0; count < tz->trips; count++)
>  		handle_thermal_trip(tz, count);
> +
> +update_timer:
>  	monitor_thermal_zone(tz);
>  }
>  EXPORT_SYMBOL_GPL(thermal_zone_device_update);
> @@ -1275,6 +1301,17 @@ thermal_zone_device_register(const char *type, int trips, int mask,
>  	INIT_DELAYED_WORK(&tz->poll_queue, thermal_zone_device_check);
>  
>  	thermal_zone_device_reset(tz);
> +
> +	/*
> +	 * As .get_mode() is not guaranteed to work at this moment, to register
> +	 * a disabled thermal zone, tzp->disalbed must be set explicitly.
> +	 */
> +	if (tz->tzp) {
> +		tz->mode = tzp->disabled == true ? THERMAL_DEVICE_DISABLED :
> +						   THERMAL_DEVICE_ENABLED;
> +	} else
> +		tz->mode = THERMAL_DEVICE_ENABLED;
> +
>  	/* Update the new thermal zone and mark it as already updated. */
>  	if (atomic_cmpxchg(&tz->need_update, 1, 0))
>  		thermal_zone_device_update(tz, THERMAL_EVENT_UNSPECIFIED);
> diff --git a/drivers/thermal/thermal_sysfs.c b/drivers/thermal/thermal_sysfs.c
> index 2241cea..8d144d4 100644
> --- a/drivers/thermal/thermal_sysfs.c
> +++ b/drivers/thermal/thermal_sysfs.c
> @@ -49,18 +49,9 @@ static ssize_t
>  mode_show(struct device *dev, struct device_attribute *attr, char *buf)
>  {
>  	struct thermal_zone_device *tz = to_thermal_zone(dev);
> -	enum thermal_device_mode mode;
> -	int result;
> -
> -	if (!tz->ops->get_mode)
> -		return -EPERM;
> -
> -	result = tz->ops->get_mode(tz, &mode);
> -	if (result)
> -		return result;
>  
> -	return sprintf(buf, "%s\n", mode == THERMAL_DEVICE_ENABLED ? "enabled"
> -		       : "disabled");
> +	return sprintf(buf, "%s\n", tz->mode == THERMAL_DEVICE_ENABLED ?
> +			"enabled" : "disabled");
>  }
>  
>  static ssize_t
> @@ -68,20 +59,16 @@ mode_store(struct device *dev, struct device_attribute *attr,
>  	   const char *buf, size_t count)
>  {
>  	struct thermal_zone_device *tz = to_thermal_zone(dev);
> -	int result;
> -
> -	if (!tz->ops->set_mode)
> -		return -EPERM;
> +	enum thermal_device_mode mode;
>  
>  	if (!strncmp(buf, "enabled", sizeof("enabled") - 1))
> -		result = tz->ops->set_mode(tz, THERMAL_DEVICE_ENABLED);
> +		mode = THERMAL_DEVICE_ENABLED;
>  	else if (!strncmp(buf, "disabled", sizeof("disabled") - 1))
> -		result = tz->ops->set_mode(tz, THERMAL_DEVICE_DISABLED);
> +		mode = THERMAL_DEVICE_DISABLED;
>  	else
> -		result = -EINVAL;
> +		return -EINVAL;
>  
> -	if (result)
> -		return result;
> +	thermal_zone_set_mode(tz, mode);
>  
>  	return count;
>  }
> diff --git a/include/linux/thermal.h b/include/linux/thermal.h
> index 5f4705f..67e4b5b 100644
> --- a/include/linux/thermal.h
> +++ b/include/linux/thermal.h
> @@ -197,6 +197,7 @@ struct thermal_zone_device {
>  	struct thermal_attr *trip_type_attrs;
>  	struct thermal_attr *trip_hyst_attrs;
>  	void *devdata;
> +	enum thermal_device_mode mode;
>  	int trips;
>  	unsigned long trips_disabled;	/* bitmap for disabled trips */
>  	int passive_delay;
> @@ -287,6 +288,9 @@ struct thermal_zone_params {
>  	 */
>  	bool no_hwmon;
>  
> +	/* a boolean to indicate if the thermal zone is disabled or not */
> +	bool disabled;
> +
>  	int num_tbps;	/* Number of tbp entries */
>  	struct thermal_bind_params *tbp;
>  
> @@ -452,6 +456,8 @@ struct thermal_zone_device *thermal_zone_get_zone_by_name(const char *name);
>  int thermal_zone_get_temp(struct thermal_zone_device *tz, int *temp);
>  int thermal_zone_get_slope(struct thermal_zone_device *tz);
>  int thermal_zone_get_offset(struct thermal_zone_device *tz);
> +int thermal_zone_set_mode(struct thermal_zone_device *tz,
> +			enum thermal_device_mode mode);
>  
>  int get_tz_trend(struct thermal_zone_device *, int);
>  struct thermal_instance *get_thermal_instance(struct thermal_zone_device *,
> @@ -518,6 +524,9 @@ static inline int thermal_zone_get_slope(
>  static inline int thermal_zone_get_offset(
>  		struct thermal_zone_device *tz)
>  { return -ENODEV; }
> +static inline int thermal_zone_set_mode(struct thermal_zone_device *tz,
> +		enum thermal_device_mode mode);
> +{ return -ENODEV; }
>  static inline int get_tz_trend(struct thermal_zone_device *tz, int trip)
>  { return -ENODEV; }
>  static inline struct thermal_instance *
>
Bartlomiej Zolnierkiewicz Nov. 5, 2018, 3:37 p.m. UTC | #2
On 11/04/2018 06:35 PM, Zhang Rui wrote:
> Thermal "mode" sysfs attribute is introduced to enable/disable a thermal
> zone, and .set_mode() callback is introduced for platform thermal driver
> to enable/disable the hardware thermal control logic.
> 
> But, thermal core does not handle disabled thermal zone well and this
> brings a couple of issues.
> 1. thermal core polling timer for disabled thermal zone still runs.
> 2. thermal core still pokes disabled thermal zone occasionally, e.g. upon
>    system resume.
> 3. platform thermal driver can not register a disabled thermal zone.

What about DT thermal drivers?

> To fix this, a new flag 'mode' is introduced in struct thermal_zone_device
> to represent the thermal zone mode, and several decisions have been made

What about cleaning private 'mode' implementations in da9062-thermal,
d8500_thermal, imx_thermal, int3400_thermal, intel_quark_dts_thermal and
of-thermal?

> based on this flag, including
> a. check the thermal zone mode right after it's registered.
> b. skip updating thermal zone if the thermal zone is disabled.
> c. stop the polling timer when the thermal zone is disabled.
> 
> TODO:
> This patch fixes issue 1 and 2, but for issue 3, for drivers that needs to
> register a disabled thermal zone, they should setup tzp->disalbed

disalbed -> disabled

> explicitly during registration and invokes thermal_zone_set_mode() when
> the hardware is ready.
> 
> Signed-off-by: Zhang Rui <rui.zhang@intel.com>
> ---
>  drivers/thermal/thermal_core.c  | 39 ++++++++++++++++++++++++++++++++++++++-
>  drivers/thermal/thermal_sysfs.c | 27 +++++++--------------------
>  include/linux/thermal.h         |  9 +++++++++
>  3 files changed, 54 insertions(+), 21 deletions(-)
> 
> diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
> index 98f02b0..65a984a 100644
> --- a/drivers/thermal/thermal_core.c
> +++ b/drivers/thermal/thermal_core.c
> @@ -305,7 +305,9 @@ static void monitor_thermal_zone(struct thermal_zone_device *tz)
>  {
>  	mutex_lock(&tz->lock);
>  
> -	if (tz->passive)
> +	if (tz->mode == THERMAL_DEVICE_DISABLED)
> +		thermal_zone_device_set_polling(tz, 0);
> +	else if (tz->passive)
>  		thermal_zone_device_set_polling(tz, tz->passive_delay);
>  	else if (tz->polling_delay)
>  		thermal_zone_device_set_polling(tz, tz->polling_delay);
> @@ -458,11 +460,33 @@ static void thermal_zone_device_reset(struct thermal_zone_device *tz)
>  		pos->initialized = false;
>  }
>  
> +int thermal_zone_set_mode(struct thermal_zone_device *tz,
> +				 enum thermal_device_mode mode)
> +{
> +	int result;
> +
> +	if (tz->mode == mode)
> +		return 0;
> +
> +	if (tz->ops->set_mode) {
> +		result = tz->ops->set_mode(tz, mode);
> +		if (result)
> +			return result;
> +	}
> +
> +	thermal_zone_device_update(tz, THERMAL_EVENT_UNSPECIFIED);

Keeping ->set_mode call coupled with thermal_zone_device_update()
one is problematic for drivers which use IRQ for reporting crossing
temperature thresholds.

The desired initialization ordering seems to be:

* mark sensor as enabled in the core
* request the driver specific IRQ handler
* enable sensor by the driver specific method
* check the sensor by the core

to fix all initialization ordering issues.

The problem with the "coupled" approach is that you need to update
sensor drivers to implement tz->ops->set_mode method (+in case of DT
drivers you need a way to implement driver specific ->set_mode itself
because they all currently use the generic of_thermal_set_mode()) to
fix all the initialization ordering issues.

In my patchset ->set_mode and thermal_zone_device_update() operations
are separated so you can fix initialization ordering issues now and
worry about implementing ->set_mode later.

> +	return 0;
> +}
> +EXPORT_SYMBOL_GPL(thermal_zone_set_mode);
> +
>  void thermal_zone_device_update(struct thermal_zone_device *tz,
>  				enum thermal_notify_event event)
>  {
>  	int count;
>  
> +	if (tz->mode == THERMAL_DEVICE_DISABLED)
> +		goto update_timer;
> +
>  	if (atomic_read(&in_suspend))
>  		return;
>  
> @@ -477,6 +501,8 @@ void thermal_zone_device_update(struct thermal_zone_device *tz,
>  
>  	for (count = 0; count < tz->trips; count++)
>  		handle_thermal_trip(tz, count);
> +
> +update_timer:
>  	monitor_thermal_zone(tz);
>  }
>  EXPORT_SYMBOL_GPL(thermal_zone_device_update);
> @@ -1275,6 +1301,17 @@ thermal_zone_device_register(const char *type, int trips, int mask,
>  	INIT_DELAYED_WORK(&tz->poll_queue, thermal_zone_device_check);
>  
>  	thermal_zone_device_reset(tz);
> +
> +	/*
> +	 * As .get_mode() is not guaranteed to work at this moment, to register
> +	 * a disabled thermal zone, tzp->disalbed must be set explicitly.

disalbed -> disabled

> +	 */
> +	if (tz->tzp) {
> +		tz->mode = tzp->disabled == true ? THERMAL_DEVICE_DISABLED :
> +						   THERMAL_DEVICE_ENABLED;

Your patch only fixes thermal_zone_device_register(), what about
fixing [devm_]thermal_zone_of_sensor_register() which is actually
used by DT thermal drivers?

> +	} else
> +		tz->mode = THERMAL_DEVICE_ENABLED;
> +
>  	/* Update the new thermal zone and mark it as already updated. */
>  	if (atomic_cmpxchg(&tz->need_update, 1, 0))
>  		thermal_zone_device_update(tz, THERMAL_EVENT_UNSPECIFIED);
> diff --git a/drivers/thermal/thermal_sysfs.c b/drivers/thermal/thermal_sysfs.c
> index 2241cea..8d144d4 100644
> --- a/drivers/thermal/thermal_sysfs.c
> +++ b/drivers/thermal/thermal_sysfs.c
> @@ -49,18 +49,9 @@ static ssize_t
>  mode_show(struct device *dev, struct device_attribute *attr, char *buf)
>  {
>  	struct thermal_zone_device *tz = to_thermal_zone(dev);
> -	enum thermal_device_mode mode;
> -	int result;
> -
> -	if (!tz->ops->get_mode)
> -		return -EPERM;
> -
> -	result = tz->ops->get_mode(tz, &mode);
> -	if (result)
> -		return result;
>  
> -	return sprintf(buf, "%s\n", mode == THERMAL_DEVICE_ENABLED ? "enabled"
> -		       : "disabled");
> +	return sprintf(buf, "%s\n", tz->mode == THERMAL_DEVICE_ENABLED ?
> +			"enabled" : "disabled");
>  }
>  
>  static ssize_t
> @@ -68,20 +59,16 @@ mode_store(struct device *dev, struct device_attribute *attr,
>  	   const char *buf, size_t count)
>  {
>  	struct thermal_zone_device *tz = to_thermal_zone(dev);
> -	int result;
> -
> -	if (!tz->ops->set_mode)
> -		return -EPERM;
> +	enum thermal_device_mode mode;
>  
>  	if (!strncmp(buf, "enabled", sizeof("enabled") - 1))
> -		result = tz->ops->set_mode(tz, THERMAL_DEVICE_ENABLED);
> +		mode = THERMAL_DEVICE_ENABLED;
>  	else if (!strncmp(buf, "disabled", sizeof("disabled") - 1))
> -		result = tz->ops->set_mode(tz, THERMAL_DEVICE_DISABLED);
> +		mode = THERMAL_DEVICE_DISABLED;
>  	else
> -		result = -EINVAL;
> +		return -EINVAL;
>  
> -	if (result)
> -		return result;
> +	thermal_zone_set_mode(tz, mode);

Why is thermal_zone_set_mode() return value ignored?

>  	return count;
>  }
> diff --git a/include/linux/thermal.h b/include/linux/thermal.h
> index 5f4705f..67e4b5b 100644
> --- a/include/linux/thermal.h
> +++ b/include/linux/thermal.h
> @@ -197,6 +197,7 @@ struct thermal_zone_device {
>  	struct thermal_attr *trip_type_attrs;
>  	struct thermal_attr *trip_hyst_attrs;
>  	void *devdata;
> +	enum thermal_device_mode mode;
>  	int trips;
>  	unsigned long trips_disabled;	/* bitmap for disabled trips */
>  	int passive_delay;
> @@ -287,6 +288,9 @@ struct thermal_zone_params {
>  	 */
>  	bool no_hwmon;
>  
> +	/* a boolean to indicate if the thermal zone is disabled or not */
> +	bool disabled;
> +
>  	int num_tbps;	/* Number of tbp entries */
>  	struct thermal_bind_params *tbp;
>  
> @@ -452,6 +456,8 @@ struct thermal_zone_device *thermal_zone_get_zone_by_name(const char *name);
>  int thermal_zone_get_temp(struct thermal_zone_device *tz, int *temp);
>  int thermal_zone_get_slope(struct thermal_zone_device *tz);
>  int thermal_zone_get_offset(struct thermal_zone_device *tz);
> +int thermal_zone_set_mode(struct thermal_zone_device *tz,
> +			enum thermal_device_mode mode);
>  
>  int get_tz_trend(struct thermal_zone_device *, int);
>  struct thermal_instance *get_thermal_instance(struct thermal_zone_device *,
> @@ -518,6 +524,9 @@ static inline int thermal_zone_get_slope(
>  static inline int thermal_zone_get_offset(
>  		struct thermal_zone_device *tz)
>  { return -ENODEV; }
> +static inline int thermal_zone_set_mode(struct thermal_zone_device *tz,
> +		enum thermal_device_mode mode);
> +{ return -ENODEV; }
>  static inline int get_tz_trend(struct thermal_zone_device *tz, int trip)
>  { return -ENODEV; }
>  static inline struct thermal_instance *

Best regards,
--
Bartlomiej Zolnierkiewicz
Samsung R&D Institute Poland
Samsung Electronics
Eduardo Valentin Nov. 6, 2018, 12:43 a.m. UTC | #3
Hey Rui,

Few comments..

On Mon, Nov 05, 2018 at 01:35:11AM +0800, Zhang Rui wrote:
> Thermal "mode" sysfs attribute is introduced to enable/disable a thermal
> zone, and .set_mode() callback is introduced for platform thermal driver
> to enable/disable the hardware thermal control logic.
> 
> But, thermal core does not handle disabled thermal zone well and this
> brings a couple of issues.
> 1. thermal core polling timer for disabled thermal zone still runs.
> 2. thermal core still pokes disabled thermal zone occasionally, e.g. upon
>    system resume.
> 3. platform thermal driver can not register a disabled thermal zone.
> 
> To fix this, a new flag 'mode' is introduced in struct thermal_zone_device
> to represent the thermal zone mode, and several decisions have been made
> based on this flag, including
> a. check the thermal zone mode right after it's registered.
> b. skip updating thermal zone if the thermal zone is disabled.
> c. stop the polling timer when the thermal zone is disabled.
> 
> TODO:
> This patch fixes issue 1 and 2, but for issue 3, for drivers that needs to
> register a disabled thermal zone, they should setup tzp->disalbed
> explicitly during registration and invokes thermal_zone_set_mode() when
> the hardware is ready.
> 
> Signed-off-by: Zhang Rui <rui.zhang@intel.com>
> ---
>  drivers/thermal/thermal_core.c  | 39 ++++++++++++++++++++++++++++++++++++++-
>  drivers/thermal/thermal_sysfs.c | 27 +++++++--------------------
>  include/linux/thermal.h         |  9 +++++++++

IF I understood the problem and the solution, I think 
our patch is a good initial RFC, but it misses a couple of points:
a. Documentation
b. DT (of-thermal) drivers
c. update of drivers on get_temp() (more as follows).

>  3 files changed, 54 insertions(+), 21 deletions(-)
> 
> diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
> index 98f02b0..65a984a 100644
> --- a/drivers/thermal/thermal_core.c
> +++ b/drivers/thermal/thermal_core.c
> @@ -305,7 +305,9 @@ static void monitor_thermal_zone(struct thermal_zone_device *tz)
>  {
>  	mutex_lock(&tz->lock);
>  
> -	if (tz->passive)
> +	if (tz->mode == THERMAL_DEVICE_DISABLED)
> +		thermal_zone_device_set_polling(tz, 0);
> +	else if (tz->passive)
>  		thermal_zone_device_set_polling(tz, tz->passive_delay);
>  	else if (tz->polling_delay)
>  		thermal_zone_device_set_polling(tz, tz->polling_delay);
> @@ -458,11 +460,33 @@ static void thermal_zone_device_reset(struct thermal_zone_device *tz)
>  		pos->initialized = false;
>  }
>  
> +int thermal_zone_set_mode(struct thermal_zone_device *tz,
> +				 enum thermal_device_mode mode)
> +{

looks to me this helper function lacks locking.

Also lacks kernel doc entries.

Also maybe better placed in helpers.c..

> +	int result;
> +
> +	if (tz->mode == mode)
> +		return 0;
> +
> +	if (tz->ops->set_mode) {
> +		result = tz->ops->set_mode(tz, mode);
> +		if (result)
> +			return result;
> +	}
> +
> +	thermal_zone_device_update(tz, THERMAL_EVENT_UNSPECIFIED);

empty line here..

> +	return 0;
> +}
> +EXPORT_SYMBOL_GPL(thermal_zone_set_mode);
> +
>  void thermal_zone_device_update(struct thermal_zone_device *tz,
>  				enum thermal_notify_event event)
>  {
>  	int count;
>  
> +	if (tz->mode == THERMAL_DEVICE_DISABLED)
> +		goto update_timer;


As there is no resource to be cleaned, Why not an early exit:
+	if (tz->mode == THERMAL_DEVICE_DISABLED) {
+		monitor_thermal_zone(tz);
+		return;
+	}

> +
>  	if (atomic_read(&in_suspend))
>  		return;
>  
> @@ -477,6 +501,8 @@ void thermal_zone_device_update(struct thermal_zone_device *tz,
>  
>  	for (count = 0; count < tz->trips; count++)
>  		handle_thermal_trip(tz, count);
> +
> +update_timer:
>  	monitor_thermal_zone(tz);

Also, please check my comment on your patch 1.

>  }
>  EXPORT_SYMBOL_GPL(thermal_zone_device_update);
> @@ -1275,6 +1301,17 @@ thermal_zone_device_register(const char *type, int trips, int mask,
>  	INIT_DELAYED_WORK(&tz->poll_queue, thermal_zone_device_check);
>  
>  	thermal_zone_device_reset(tz);
> +
> +	/*
> +	 * As .get_mode() is not guaranteed to work at this moment, to register
> +	 * a disabled thermal zone, tzp->disalbed must be set explicitly.
> +	 */
> +	if (tz->tzp) {
> +		tz->mode = tzp->disabled == true ? THERMAL_DEVICE_DISABLED :
> +						   THERMAL_DEVICE_ENABLED;
> +	} else
> +		tz->mode = THERMAL_DEVICE_ENABLED;

nip: no need for first curl (or put on both).
> +


>  	/* Update the new thermal zone and mark it as already updated. */
>  	if (atomic_cmpxchg(&tz->need_update, 1, 0))
>  		thermal_zone_device_update(tz, THERMAL_EVENT_UNSPECIFIED);
> diff --git a/drivers/thermal/thermal_sysfs.c b/drivers/thermal/thermal_sysfs.c
> index 2241cea..8d144d4 100644
> --- a/drivers/thermal/thermal_sysfs.c
> +++ b/drivers/thermal/thermal_sysfs.c
> @@ -49,18 +49,9 @@ static ssize_t
>  mode_show(struct device *dev, struct device_attribute *attr, char *buf)
>  {
>  	struct thermal_zone_device *tz = to_thermal_zone(dev);
> -	enum thermal_device_mode mode;
> -	int result;
> -
> -	if (!tz->ops->get_mode)
> -		return -EPERM;
> -
> -	result = tz->ops->get_mode(tz, &mode);
> -	if (result)
> -		return result;
>  
> -	return sprintf(buf, "%s\n", mode == THERMAL_DEVICE_ENABLED ? "enabled"
> -		       : "disabled");

This part of this patch confuses me. Why are we getting rid of
.get_mode()? If that is the case, we should:
- remove it from ops
- remove from all drivers that provide it.

Assuming your idea was to leave mode control into thermal core only.
(also see that reading mode from userland is only allowed if
.get_mode() is provided in the ops).


> +	return sprintf(buf, "%s\n", tz->mode == THERMAL_DEVICE_ENABLED ?
> +			"enabled" : "disabled");
>  }
>  
>  static ssize_t
> @@ -68,20 +59,16 @@ mode_store(struct device *dev, struct device_attribute *attr,
>  	   const char *buf, size_t count)
>  {
>  	struct thermal_zone_device *tz = to_thermal_zone(dev);
> -	int result;
> -
> -	if (!tz->ops->set_mode)
> -		return -EPERM;

Ok.. 

> +	enum thermal_device_mode mode;
>  
>  	if (!strncmp(buf, "enabled", sizeof("enabled") - 1))
> -		result = tz->ops->set_mode(tz, THERMAL_DEVICE_ENABLED);
> +		mode = THERMAL_DEVICE_ENABLED;
>  	else if (!strncmp(buf, "disabled", sizeof("disabled") - 1))
> -		result = tz->ops->set_mode(tz, THERMAL_DEVICE_DISABLED);
> +		mode = THERMAL_DEVICE_DISABLED;
>  	else
> -		result = -EINVAL;
> +		return -EINVAL;
>  
> -	if (result)
> -		return result;
> +	thermal_zone_set_mode(tz, mode);
>  

error code here..

>  	return count;
>  }
> diff --git a/include/linux/thermal.h b/include/linux/thermal.h
> index 5f4705f..67e4b5b 100644
> --- a/include/linux/thermal.h
> +++ b/include/linux/thermal.h
> @@ -197,6 +197,7 @@ struct thermal_zone_device {
>  	struct thermal_attr *trip_type_attrs;
>  	struct thermal_attr *trip_hyst_attrs;
>  	void *devdata;
> +	enum thermal_device_mode mode;
>  	int trips;
>  	unsigned long trips_disabled;	/* bitmap for disabled trips */
>  	int passive_delay;
> @@ -287,6 +288,9 @@ struct thermal_zone_params {
>  	 */
>  	bool no_hwmon;
>  
> +	/* a boolean to indicate if the thermal zone is disabled or not */
> +	bool disabled;
> +
>  	int num_tbps;	/* Number of tbp entries */
>  	struct thermal_bind_params *tbp;
>  
> @@ -452,6 +456,8 @@ struct thermal_zone_device *thermal_zone_get_zone_by_name(const char *name);
>  int thermal_zone_get_temp(struct thermal_zone_device *tz, int *temp);
>  int thermal_zone_get_slope(struct thermal_zone_device *tz);
>  int thermal_zone_get_offset(struct thermal_zone_device *tz);
> +int thermal_zone_set_mode(struct thermal_zone_device *tz,
> +			enum thermal_device_mode mode);
>  
>  int get_tz_trend(struct thermal_zone_device *, int);
>  struct thermal_instance *get_thermal_instance(struct thermal_zone_device *,
> @@ -518,6 +524,9 @@ static inline int thermal_zone_get_slope(
>  static inline int thermal_zone_get_offset(
>  		struct thermal_zone_device *tz)
>  { return -ENODEV; }
> +static inline int thermal_zone_set_mode(struct thermal_zone_device *tz,
> +		enum thermal_device_mode mode);
> +{ return -ENODEV; }
>  static inline int get_tz_trend(struct thermal_zone_device *tz, int trip)
>  { return -ENODEV; }
>  static inline struct thermal_instance *
> -- 
> 2.7.4
>
Zhang Rui Nov. 6, 2018, 3:48 a.m. UTC | #4
On 一, 2018-11-05 at 15:33 +0100, Daniel Lezcano wrote:
> On 04/11/2018 18:35, Zhang Rui wrote:
> > 
> > Thermal "mode" sysfs attribute is introduced to enable/disable a
> > thermal
> > zone, and .set_mode() callback is introduced for platform thermal
> > driver
> > to enable/disable the hardware thermal control logic.
> > 
> > But, thermal core does not handle disabled thermal zone well and
> > this
> > brings a couple of issues.
> > 1. thermal core polling timer for disabled thermal zone still runs.
> > 2. thermal core still pokes disabled thermal zone occasionally,
> > e.g. upon
> >    system resume.
> > 3. platform thermal driver can not register a disabled thermal
> > zone.
> > 
> > To fix this, a new flag 'mode' is introduced in struct
> > thermal_zone_device
> > to represent the thermal zone mode, and several decisions have been
> > made
> > based on this flag, including
> > a. check the thermal zone mode right after it's registered.
> > b. skip updating thermal zone if the thermal zone is disabled.
> > c. stop the polling timer when the thermal zone is disabled.
> > 
> > TODO:
> > This patch fixes issue 1 and 2, but for issue 3, for drivers that
> > needs to
> > register a disabled thermal zone, they should setup tzp->disalbed
> > explicitly during registration and invokes thermal_zone_set_mode()
> > when
> > the hardware is ready.
> Is there a situation where we want an enabled thermal zone without
> association with a driver ?

I'm not sure I understand the question correctly.
Driver decides when to register the thermal zones, and driver knows if
the thermal zones registered are ready for use or not during
registration. This is also association, right?

thanks,
rui

> 
> > 
> > Signed-off-by: Zhang Rui <rui.zhang@intel.com>
> > ---
> >  drivers/thermal/thermal_core.c  | 39
> > ++++++++++++++++++++++++++++++++++++++-
> >  drivers/thermal/thermal_sysfs.c | 27 +++++++--------------------
> >  include/linux/thermal.h         |  9 +++++++++
> >  3 files changed, 54 insertions(+), 21 deletions(-)
> > 
> > diff --git a/drivers/thermal/thermal_core.c
> > b/drivers/thermal/thermal_core.c
> > index 98f02b0..65a984a 100644
> > --- a/drivers/thermal/thermal_core.c
> > +++ b/drivers/thermal/thermal_core.c
> > @@ -305,7 +305,9 @@ static void monitor_thermal_zone(struct
> > thermal_zone_device *tz)
> >  {
> >  	mutex_lock(&tz->lock);
> >  
> > -	if (tz->passive)
> > +	if (tz->mode == THERMAL_DEVICE_DISABLED)
> > +		thermal_zone_device_set_polling(tz, 0);
> > +	else if (tz->passive)
> >  		thermal_zone_device_set_polling(tz, tz-
> > >passive_delay);
> >  	else if (tz->polling_delay)
> >  		thermal_zone_device_set_polling(tz, tz-
> > >polling_delay);
> > @@ -458,11 +460,33 @@ static void thermal_zone_device_reset(struct
> > thermal_zone_device *tz)
> >  		pos->initialized = false;
> >  }
> >  
> > +int thermal_zone_set_mode(struct thermal_zone_device *tz,
> > +				 enum thermal_device_mode mode)
> > +{
> > +	int result;
> > +
> > +	if (tz->mode == mode)
> > +		return 0;
> > +
> > +	if (tz->ops->set_mode) {
> > +		result = tz->ops->set_mode(tz, mode);
> > +		if (result)
> > +			return result;
> > +	}
> > +
> > +	thermal_zone_device_update(tz, THERMAL_EVENT_UNSPECIFIED);
> > +	return 0;
> > +}
> > +EXPORT_SYMBOL_GPL(thermal_zone_set_mode);
> > +
> >  void thermal_zone_device_update(struct thermal_zone_device *tz,
> >  				enum thermal_notify_event event)
> >  {
> >  	int count;
> >  
> > +	if (tz->mode == THERMAL_DEVICE_DISABLED)
> > +		goto update_timer;
> > +
> >  	if (atomic_read(&in_suspend))
> >  		return;
> >  
> > @@ -477,6 +501,8 @@ void thermal_zone_device_update(struct
> > thermal_zone_device *tz,
> >  
> >  	for (count = 0; count < tz->trips; count++)
> >  		handle_thermal_trip(tz, count);
> > +
> > +update_timer:
> >  	monitor_thermal_zone(tz);
> >  }
> >  EXPORT_SYMBOL_GPL(thermal_zone_device_update);
> > @@ -1275,6 +1301,17 @@ thermal_zone_device_register(const char
> > *type, int trips, int mask,
> >  	INIT_DELAYED_WORK(&tz->poll_queue,
> > thermal_zone_device_check);
> >  
> >  	thermal_zone_device_reset(tz);
> > +
> > +	/*
> > +	 * As .get_mode() is not guaranteed to work at this
> > moment, to register
> > +	 * a disabled thermal zone, tzp->disalbed must be set
> > explicitly.
> > +	 */
> > +	if (tz->tzp) {
> > +		tz->mode = tzp->disabled == true ?
> > THERMAL_DEVICE_DISABLED :
> > +						   THERMAL_DEVICE_
> > ENABLED;
> > +	} else
> > +		tz->mode = THERMAL_DEVICE_ENABLED;
> > +
> >  	/* Update the new thermal zone and mark it as already
> > updated. */
> >  	if (atomic_cmpxchg(&tz->need_update, 1, 0))
> >  		thermal_zone_device_update(tz,
> > THERMAL_EVENT_UNSPECIFIED);
> > diff --git a/drivers/thermal/thermal_sysfs.c
> > b/drivers/thermal/thermal_sysfs.c
> > index 2241cea..8d144d4 100644
> > --- a/drivers/thermal/thermal_sysfs.c
> > +++ b/drivers/thermal/thermal_sysfs.c
> > @@ -49,18 +49,9 @@ static ssize_t
> >  mode_show(struct device *dev, struct device_attribute *attr, char
> > *buf)
> >  {
> >  	struct thermal_zone_device *tz = to_thermal_zone(dev);
> > -	enum thermal_device_mode mode;
> > -	int result;
> > -
> > -	if (!tz->ops->get_mode)
> > -		return -EPERM;
> > -
> > -	result = tz->ops->get_mode(tz, &mode);
> > -	if (result)
> > -		return result;
> >  
> > -	return sprintf(buf, "%s\n", mode == THERMAL_DEVICE_ENABLED
> > ? "enabled"
> > -		       : "disabled");
> > +	return sprintf(buf, "%s\n", tz->mode ==
> > THERMAL_DEVICE_ENABLED ?
> > +			"enabled" : "disabled");
> >  }
> >  
> >  static ssize_t
> > @@ -68,20 +59,16 @@ mode_store(struct device *dev, struct
> > device_attribute *attr,
> >  	   const char *buf, size_t count)
> >  {
> >  	struct thermal_zone_device *tz = to_thermal_zone(dev);
> > -	int result;
> > -
> > -	if (!tz->ops->set_mode)
> > -		return -EPERM;
> > +	enum thermal_device_mode mode;
> >  
> >  	if (!strncmp(buf, "enabled", sizeof("enabled") - 1))
> > -		result = tz->ops->set_mode(tz,
> > THERMAL_DEVICE_ENABLED);
> > +		mode = THERMAL_DEVICE_ENABLED;
> >  	else if (!strncmp(buf, "disabled", sizeof("disabled") -
> > 1))
> > -		result = tz->ops->set_mode(tz,
> > THERMAL_DEVICE_DISABLED);
> > +		mode = THERMAL_DEVICE_DISABLED;
> >  	else
> > -		result = -EINVAL;
> > +		return -EINVAL;
> >  
> > -	if (result)
> > -		return result;
> > +	thermal_zone_set_mode(tz, mode);
> >  
> >  	return count;
> >  }
> > diff --git a/include/linux/thermal.h b/include/linux/thermal.h
> > index 5f4705f..67e4b5b 100644
> > --- a/include/linux/thermal.h
> > +++ b/include/linux/thermal.h
> > @@ -197,6 +197,7 @@ struct thermal_zone_device {
> >  	struct thermal_attr *trip_type_attrs;
> >  	struct thermal_attr *trip_hyst_attrs;
> >  	void *devdata;
> > +	enum thermal_device_mode mode;
> >  	int trips;
> >  	unsigned long trips_disabled;	/* bitmap for
> > disabled trips */
> >  	int passive_delay;
> > @@ -287,6 +288,9 @@ struct thermal_zone_params {
> >  	 */
> >  	bool no_hwmon;
> >  
> > +	/* a boolean to indicate if the thermal zone is disabled
> > or not */
> > +	bool disabled;
> > +
> >  	int num_tbps;	/* Number of tbp entries */
> >  	struct thermal_bind_params *tbp;
> >  
> > @@ -452,6 +456,8 @@ struct thermal_zone_device
> > *thermal_zone_get_zone_by_name(const char *name);
> >  int thermal_zone_get_temp(struct thermal_zone_device *tz, int
> > *temp);
> >  int thermal_zone_get_slope(struct thermal_zone_device *tz);
> >  int thermal_zone_get_offset(struct thermal_zone_device *tz);
> > +int thermal_zone_set_mode(struct thermal_zone_device *tz,
> > +			enum thermal_device_mode mode);
> >  
> >  int get_tz_trend(struct thermal_zone_device *, int);
> >  struct thermal_instance *get_thermal_instance(struct
> > thermal_zone_device *,
> > @@ -518,6 +524,9 @@ static inline int thermal_zone_get_slope(
> >  static inline int thermal_zone_get_offset(
> >  		struct thermal_zone_device *tz)
> >  { return -ENODEV; }
> > +static inline int thermal_zone_set_mode(struct thermal_zone_device
> > *tz,
> > +		enum thermal_device_mode mode);
> > +{ return -ENODEV; }
> >  static inline int get_tz_trend(struct thermal_zone_device *tz, int
> > trip)
> >  { return -ENODEV; }
> >  static inline struct thermal_instance *
> > 
>
Zhang Rui Nov. 6, 2018, 7:13 a.m. UTC | #5
On 一, 2018-11-05 at 16:37 +0100, Bartlomiej Zolnierkiewicz wrote:
> On 11/04/2018 06:35 PM, Zhang Rui wrote:
> > 
> > Thermal "mode" sysfs attribute is introduced to enable/disable a
> > thermal
> > zone, and .set_mode() callback is introduced for platform thermal
> > driver
> > to enable/disable the hardware thermal control logic.
> > 
> > But, thermal core does not handle disabled thermal zone well and
> > this
> > brings a couple of issues.
> > 1. thermal core polling timer for disabled thermal zone still runs.
> > 2. thermal core still pokes disabled thermal zone occasionally,
> > e.g. upon
> >    system resume.
> > 3. platform thermal driver can not register a disabled thermal
> > zone.
> What about DT thermal drivers?
> 
yes, I think DT thermal drivers should also be covered.
The idea is to provide a mechanism so that any platform driver can
register a disabled thermal zone.

> > 
> > To fix this, a new flag 'mode' is introduced in struct
> > thermal_zone_device
> > to represent the thermal zone mode, and several decisions have been
> > made
> What about cleaning private 'mode' implementations in da9062-thermal,
> d8500_thermal, imx_thermal, int3400_thermal, intel_quark_dts_thermal
> and
> of-thermal?

yes, if we can leverage one flag, aka, the one in thermal_zone_device
struct, then there is no reason to have private ones.

> > 
> > based on this flag, including
> > a. check the thermal zone mode right after it's registered.
> > b. skip updating thermal zone if the thermal zone is disabled.
> > c. stop the polling timer when the thermal zone is disabled.
> > 
> > TODO:
> > This patch fixes issue 1 and 2, but for issue 3, for drivers that
> > needs to
> > register a disabled thermal zone, they should setup tzp->disalbed
> disalbed -> disabled
> 
> > 
> > explicitly during registration and invokes thermal_zone_set_mode()
> > when
> > the hardware is ready.
> > 
> > Signed-off-by: Zhang Rui <rui.zhang@intel.com>
> > ---
> >  drivers/thermal/thermal_core.c  | 39
> > ++++++++++++++++++++++++++++++++++++++-
> >  drivers/thermal/thermal_sysfs.c | 27 +++++++--------------------
> >  include/linux/thermal.h         |  9 +++++++++
> >  3 files changed, 54 insertions(+), 21 deletions(-)
> > 
> > diff --git a/drivers/thermal/thermal_core.c
> > b/drivers/thermal/thermal_core.c
> > index 98f02b0..65a984a 100644
> > --- a/drivers/thermal/thermal_core.c
> > +++ b/drivers/thermal/thermal_core.c
> > @@ -305,7 +305,9 @@ static void monitor_thermal_zone(struct
> > thermal_zone_device *tz)
> >  {
> >  	mutex_lock(&tz->lock);
> >  
> > -	if (tz->passive)
> > +	if (tz->mode == THERMAL_DEVICE_DISABLED)
> > +		thermal_zone_device_set_polling(tz, 0);
> > +	else if (tz->passive)
> >  		thermal_zone_device_set_polling(tz, tz-
> > >passive_delay);
> >  	else if (tz->polling_delay)
> >  		thermal_zone_device_set_polling(tz, tz-
> > >polling_delay);
> > @@ -458,11 +460,33 @@ static void thermal_zone_device_reset(struct
> > thermal_zone_device *tz)
> >  		pos->initialized = false;
> >  }
> >  
> > +int thermal_zone_set_mode(struct thermal_zone_device *tz,
> > +				 enum thermal_device_mode mode)
> > +{
> > +	int result;
> > +
> > +	if (tz->mode == mode)
> > +		return 0;
> > +
> > +	if (tz->ops->set_mode) {
> > +		result = tz->ops->set_mode(tz, mode);
> > +		if (result)
> > +			return result;
> > +	}
> > +
> > +	thermal_zone_device_update(tz, THERMAL_EVENT_UNSPECIFIED);
> Keeping ->set_mode call coupled with thermal_zone_device_update()
> one is problematic for drivers which use IRQ for reporting crossing
> temperature thresholds.
> 
> The desired initialization ordering seems to be:
> 
> * mark sensor as enabled in the core
> * request the driver specific IRQ handler
> * enable sensor by the driver specific method
> * check the sensor by the core
> 
> to fix all initialization ordering issues.

I'm confused.
I think you mean mark sensor as disabled in the first step?

> 
> The problem with the "coupled" approach is that you need to update
> sensor drivers to implement tz->ops->set_mode method (+in case of DT
> drivers you need a way to implement driver specific ->set_mode itself
> because they all currently use the generic of_thermal_set_mode()) to
> fix all the initialization ordering issues.
> 
> In my patchset ->set_mode and thermal_zone_device_update() operations
> are separated so you can fix initialization ordering issues now and
> worry about implementing ->set_mode later.

right.
I was trying to do the same thing in this patch set, so the order would
be

* mark sensor as disabled during registration (so that
thermal_zone_device_update() won't touch the sensor)
* request the driver specific IRQ handler
* enable sensor by the driver specific method
* call thermal_zone_set_mode() to tell thermal core to check the sensor

> 
> > 
> > +	return 0;
> > +}
> > +EXPORT_SYMBOL_GPL(thermal_zone_set_mode);
> > +
> >  void thermal_zone_device_update(struct thermal_zone_device *tz,
> >  				enum thermal_notify_event event)
> >  {
> >  	int count;
> >  
> > +	if (tz->mode == THERMAL_DEVICE_DISABLED)
> > +		goto update_timer;
> > +
> >  	if (atomic_read(&in_suspend))
> >  		return;
> >  
> > @@ -477,6 +501,8 @@ void thermal_zone_device_update(struct
> > thermal_zone_device *tz,
> >  
> >  	for (count = 0; count < tz->trips; count++)
> >  		handle_thermal_trip(tz, count);
> > +
> > +update_timer:
> >  	monitor_thermal_zone(tz);
> >  }
> >  EXPORT_SYMBOL_GPL(thermal_zone_device_update);
> > @@ -1275,6 +1301,17 @@ thermal_zone_device_register(const char
> > *type, int trips, int mask,
> >  	INIT_DELAYED_WORK(&tz->poll_queue,
> > thermal_zone_device_check);
> >  
> >  	thermal_zone_device_reset(tz);
> > +
> > +	/*
> > +	 * As .get_mode() is not guaranteed to work at this
> > moment, to register
> > +	 * a disabled thermal zone, tzp->disalbed must be set
> > explicitly.
> disalbed -> disabled
> 
> > 
> > +	 */
> > +	if (tz->tzp) {
> > +		tz->mode = tzp->disabled == true ?
> > THERMAL_DEVICE_DISABLED :
> > +						   THERMAL_DEVICE_
> > ENABLED;
> Your patch only fixes thermal_zone_device_register(), what about
> fixing [devm_]thermal_zone_of_sensor_register() which is actually
> used by DT thermal drivers?

that's next step.
This is a draft patch to address my proposal for the issues.
And the basic idea is to separate thermal_zone_device registration and
thermal_zone_device_update() so that drivers can invoke
thermal_zone_device_update() (via thermal_zone_set_mode()) only after
it's ready.

And the difference between your proposal and this one is that, I'd like
to see if we can solve all the problems by implementing the "mode"
control in one place, aka, thermal core, instead of of_thermal and any 
platform thermal drivers.

thanks,
rui
> 
> > 
> > +	} else
> > +		tz->mode = THERMAL_DEVICE_ENABLED;
> > +
> >  	/* Update the new thermal zone and mark it as already
> > updated. */
> >  	if (atomic_cmpxchg(&tz->need_update, 1, 0))
> >  		thermal_zone_device_update(tz,
> > THERMAL_EVENT_UNSPECIFIED);
> > diff --git a/drivers/thermal/thermal_sysfs.c
> > b/drivers/thermal/thermal_sysfs.c
> > index 2241cea..8d144d4 100644
> > --- a/drivers/thermal/thermal_sysfs.c
> > +++ b/drivers/thermal/thermal_sysfs.c
> > @@ -49,18 +49,9 @@ static ssize_t
> >  mode_show(struct device *dev, struct device_attribute *attr, char
> > *buf)
> >  {
> >  	struct thermal_zone_device *tz = to_thermal_zone(dev);
> > -	enum thermal_device_mode mode;
> > -	int result;
> > -
> > -	if (!tz->ops->get_mode)
> > -		return -EPERM;
> > -
> > -	result = tz->ops->get_mode(tz, &mode);
> > -	if (result)
> > -		return result;
> >  
> > -	return sprintf(buf, "%s\n", mode == THERMAL_DEVICE_ENABLED
> > ? "enabled"
> > -		       : "disabled");
> > +	return sprintf(buf, "%s\n", tz->mode ==
> > THERMAL_DEVICE_ENABLED ?
> > +			"enabled" : "disabled");
> >  }
> >  
> >  static ssize_t
> > @@ -68,20 +59,16 @@ mode_store(struct device *dev, struct
> > device_attribute *attr,
> >  	   const char *buf, size_t count)
> >  {
> >  	struct thermal_zone_device *tz = to_thermal_zone(dev);
> > -	int result;
> > -
> > -	if (!tz->ops->set_mode)
> > -		return -EPERM;
> > +	enum thermal_device_mode mode;
> >  
> >  	if (!strncmp(buf, "enabled", sizeof("enabled") - 1))
> > -		result = tz->ops->set_mode(tz,
> > THERMAL_DEVICE_ENABLED);
> > +		mode = THERMAL_DEVICE_ENABLED;
> >  	else if (!strncmp(buf, "disabled", sizeof("disabled") -
> > 1))
> > -		result = tz->ops->set_mode(tz,
> > THERMAL_DEVICE_DISABLED);
> > +		mode = THERMAL_DEVICE_DISABLED;
> >  	else
> > -		result = -EINVAL;
> > +		return -EINVAL;
> >  
> > -	if (result)
> > -		return result;
> > +	thermal_zone_set_mode(tz, mode);
> Why is thermal_zone_set_mode() return value ignored?
> 
> > 
> >  	return count;
> >  }
> > diff --git a/include/linux/thermal.h b/include/linux/thermal.h
> > index 5f4705f..67e4b5b 100644
> > --- a/include/linux/thermal.h
> > +++ b/include/linux/thermal.h
> > @@ -197,6 +197,7 @@ struct thermal_zone_device {
> >  	struct thermal_attr *trip_type_attrs;
> >  	struct thermal_attr *trip_hyst_attrs;
> >  	void *devdata;
> > +	enum thermal_device_mode mode;
> >  	int trips;
> >  	unsigned long trips_disabled;	/* bitmap for
> > disabled trips */
> >  	int passive_delay;
> > @@ -287,6 +288,9 @@ struct thermal_zone_params {
> >  	 */
> >  	bool no_hwmon;
> >  
> > +	/* a boolean to indicate if the thermal zone is disabled
> > or not */
> > +	bool disabled;
> > +
> >  	int num_tbps;	/* Number of tbp entries */
> >  	struct thermal_bind_params *tbp;
> >  
> > @@ -452,6 +456,8 @@ struct thermal_zone_device
> > *thermal_zone_get_zone_by_name(const char *name);
> >  int thermal_zone_get_temp(struct thermal_zone_device *tz, int
> > *temp);
> >  int thermal_zone_get_slope(struct thermal_zone_device *tz);
> >  int thermal_zone_get_offset(struct thermal_zone_device *tz);
> > +int thermal_zone_set_mode(struct thermal_zone_device *tz,
> > +			enum thermal_device_mode mode);
> >  
> >  int get_tz_trend(struct thermal_zone_device *, int);
> >  struct thermal_instance *get_thermal_instance(struct
> > thermal_zone_device *,
> > @@ -518,6 +524,9 @@ static inline int thermal_zone_get_slope(
> >  static inline int thermal_zone_get_offset(
> >  		struct thermal_zone_device *tz)
> >  { return -ENODEV; }
> > +static inline int thermal_zone_set_mode(struct thermal_zone_device
> > *tz,
> > +		enum thermal_device_mode mode);
> > +{ return -ENODEV; }
> >  static inline int get_tz_trend(struct thermal_zone_device *tz, int
> > trip)
> >  { return -ENODEV; }
> >  static inline struct thermal_instance *
> Best regards,
> --
> Bartlomiej Zolnierkiewicz
> Samsung R&D Institute Poland
> Samsung Electronics
Zhang Rui Nov. 6, 2018, 7:54 a.m. UTC | #6
Hi, Eduardo,

thanks for review.

On 一, 2018-11-05 at 16:43 -0800, Eduardo Valentin wrote:
> Hey Rui,
> 
> Few comments..
> 
> On Mon, Nov 05, 2018 at 01:35:11AM +0800, Zhang Rui wrote:
> > 
> > Thermal "mode" sysfs attribute is introduced to enable/disable a
> > thermal
> > zone, and .set_mode() callback is introduced for platform thermal
> > driver
> > to enable/disable the hardware thermal control logic.
> > 
> > But, thermal core does not handle disabled thermal zone well and
> > this
> > brings a couple of issues.
> > 1. thermal core polling timer for disabled thermal zone still runs.
> > 2. thermal core still pokes disabled thermal zone occasionally,
> > e.g. upon
> >    system resume.
> > 3. platform thermal driver can not register a disabled thermal
> > zone.
> > 
> > To fix this, a new flag 'mode' is introduced in struct
> > thermal_zone_device
> > to represent the thermal zone mode, and several decisions have been
> > made
> > based on this flag, including
> > a. check the thermal zone mode right after it's registered.
> > b. skip updating thermal zone if the thermal zone is disabled.
> > c. stop the polling timer when the thermal zone is disabled.
> > 
> > TODO:
> > This patch fixes issue 1 and 2, but for issue 3, for drivers that
> > needs to
> > register a disabled thermal zone, they should setup tzp->disalbed
> > explicitly during registration and invokes thermal_zone_set_mode()
> > when
> > the hardware is ready.
> > 
> > Signed-off-by: Zhang Rui <rui.zhang@intel.com>
> > ---
> >  drivers/thermal/thermal_core.c  | 39
> > ++++++++++++++++++++++++++++++++++++++-
> >  drivers/thermal/thermal_sysfs.c | 27 +++++++--------------------
> >  include/linux/thermal.h         |  9 +++++++++
> IF I understood the problem and the solution, I think 
> our patch is a good initial RFC, but it misses a couple of points:

right. This is a draft proposal rather than a patch, and it is not
targeting for upstream. And I want to use it as reference for the
discussion in LPC.

> a. Documentation
> b. DT (of-thermal) drivers
> c. update of drivers on get_temp() (more as follows).
> 
Good point.
And my slides also proposed userspace tool to check "mode" sysfs I/F
before poking the others, because we have seen bug reports that user
complaining about error messages upon poking "temp" sysfs attribute.
But you're right, we should return error immediately for some of the
sysfs I/F access for disabled thermal zones.

> > 
> >  3 files changed, 54 insertions(+), 21 deletions(-)
> > 
> > diff --git a/drivers/thermal/thermal_core.c
> > b/drivers/thermal/thermal_core.c
> > index 98f02b0..65a984a 100644
> > --- a/drivers/thermal/thermal_core.c
> > +++ b/drivers/thermal/thermal_core.c
> > @@ -305,7 +305,9 @@ static void monitor_thermal_zone(struct
> > thermal_zone_device *tz)
> >  {
> >  	mutex_lock(&tz->lock);
> >  
> > -	if (tz->passive)
> > +	if (tz->mode == THERMAL_DEVICE_DISABLED)
> > +		thermal_zone_device_set_polling(tz, 0);
> > +	else if (tz->passive)
> >  		thermal_zone_device_set_polling(tz, tz-
> > >passive_delay);
> >  	else if (tz->polling_delay)
> >  		thermal_zone_device_set_polling(tz, tz-
> > >polling_delay);
> > @@ -458,11 +460,33 @@ static void thermal_zone_device_reset(struct
> > thermal_zone_device *tz)
> >  		pos->initialized = false;
> >  }
> >  
> > +int thermal_zone_set_mode(struct thermal_zone_device *tz,
> > +				 enum thermal_device_mode mode)
> > +{
> looks to me this helper function lacks locking.
> 
> Also lacks kernel doc entries.
> 
> Also maybe better placed in helpers.c..
> 
makes sense.
> > 
> > +	int result;
> > +
> > +	if (tz->mode == mode)
> > +		return 0;
> > +
> > +	if (tz->ops->set_mode) {
> > +		result = tz->ops->set_mode(tz, mode);
> > +		if (result)
> > +			return result;
> > +	}
> > +
> > +	thermal_zone_device_update(tz, THERMAL_EVENT_UNSPECIFIED);
> empty line here..
> 
> > 
> > +	return 0;
> > +}
> > +EXPORT_SYMBOL_GPL(thermal_zone_set_mode);
> > +
> >  void thermal_zone_device_update(struct thermal_zone_device *tz,
> >  				enum thermal_notify_event event)
> >  {
> >  	int count;
> >  
> > +	if (tz->mode == THERMAL_DEVICE_DISABLED)
> > +		goto update_timer;
> 
> As there is no resource to be cleaned, Why not an early exit:
> +	if (tz->mode == THERMAL_DEVICE_DISABLED) {
> +		monitor_thermal_zone(tz);
> +		return;
> +	}
> 
> > 
> > +
> >  	if (atomic_read(&in_suspend))
> >  		return;
> >  
> > @@ -477,6 +501,8 @@ void thermal_zone_device_update(struct
> > thermal_zone_device *tz,
> >  
> >  	for (count = 0; count < tz->trips; count++)
> >  		handle_thermal_trip(tz, count);
> > +
> > +update_timer:
> >  	monitor_thermal_zone(tz);
> Also, please check my comment on your patch 1.
> 
> > 
> >  }
> >  EXPORT_SYMBOL_GPL(thermal_zone_device_update);
> > @@ -1275,6 +1301,17 @@ thermal_zone_device_register(const char
> > *type, int trips, int mask,
> >  	INIT_DELAYED_WORK(&tz->poll_queue,
> > thermal_zone_device_check);
> >  
> >  	thermal_zone_device_reset(tz);
> > +
> > +	/*
> > +	 * As .get_mode() is not guaranteed to work at this
> > moment, to register
> > +	 * a disabled thermal zone, tzp->disalbed must be set
> > explicitly.
> > +	 */
> > +	if (tz->tzp) {
> > +		tz->mode = tzp->disabled == true ?
> > THERMAL_DEVICE_DISABLED :
> > +						   THERMAL_DEVICE_
> > ENABLED;
> > +	} else
> > +		tz->mode = THERMAL_DEVICE_ENABLED;
> nip: no need for first curl (or put on both).
> > 
> > +
> 
> > 
> >  	/* Update the new thermal zone and mark it as already
> > updated. */
> >  	if (atomic_cmpxchg(&tz->need_update, 1, 0))
> >  		thermal_zone_device_update(tz,
> > THERMAL_EVENT_UNSPECIFIED);
> > diff --git a/drivers/thermal/thermal_sysfs.c
> > b/drivers/thermal/thermal_sysfs.c
> > index 2241cea..8d144d4 100644
> > --- a/drivers/thermal/thermal_sysfs.c
> > +++ b/drivers/thermal/thermal_sysfs.c
> > @@ -49,18 +49,9 @@ static ssize_t
> >  mode_show(struct device *dev, struct device_attribute *attr, char
> > *buf)
> >  {
> >  	struct thermal_zone_device *tz = to_thermal_zone(dev);
> > -	enum thermal_device_mode mode;
> > -	int result;
> > -
> > -	if (!tz->ops->get_mode)
> > -		return -EPERM;
> > -
> > -	result = tz->ops->get_mode(tz, &mode);
> > -	if (result)
> > -		return result;
> >  
> > -	return sprintf(buf, "%s\n", mode == THERMAL_DEVICE_ENABLED
> > ? "enabled"
> > -		       : "disabled");
> This part of this patch confuses me. Why are we getting rid of
> .get_mode()? If that is the case, we should:
> - remove it from ops
> - remove from all drivers that provide it.
> 
> Assuming your idea was to leave mode control into thermal core only.

exactly.
Originally, I proposed to use ->get_mode() to get mode during thermal
zone device registration. But as I'm not sure if ->get_mode() is always
valid during registration, tzp->disable flag is used in this version.

Removing get_mode() sounds reasonable to me.

> (also see that reading mode from userland is only allowed if
> .get_mode() is provided in the ops).
> 
right, we can make "mode" mandatory then.

thanks,
rui
> 
> > 
> > +	return sprintf(buf, "%s\n", tz->mode ==
> > THERMAL_DEVICE_ENABLED ?
> > +			"enabled" : "disabled");
> >  }
> >  
> >  static ssize_t
> > @@ -68,20 +59,16 @@ mode_store(struct device *dev, struct
> > device_attribute *attr,
> >  	   const char *buf, size_t count)
> >  {
> >  	struct thermal_zone_device *tz = to_thermal_zone(dev);
> > -	int result;
> > -
> > -	if (!tz->ops->set_mode)
> > -		return -EPERM;
> Ok.. 
> 
> > 
> > +	enum thermal_device_mode mode;
> >  
> >  	if (!strncmp(buf, "enabled", sizeof("enabled") - 1))
> > -		result = tz->ops->set_mode(tz,
> > THERMAL_DEVICE_ENABLED);
> > +		mode = THERMAL_DEVICE_ENABLED;
> >  	else if (!strncmp(buf, "disabled", sizeof("disabled") -
> > 1))
> > -		result = tz->ops->set_mode(tz,
> > THERMAL_DEVICE_DISABLED);
> > +		mode = THERMAL_DEVICE_DISABLED;
> >  	else
> > -		result = -EINVAL;
> > +		return -EINVAL;
> >  
> > -	if (result)
> > -		return result;
> > +	thermal_zone_set_mode(tz, mode);
> >  
> error code here..
> 
> > 
> >  	return count;
> >  }
> > diff --git a/include/linux/thermal.h b/include/linux/thermal.h
> > index 5f4705f..67e4b5b 100644
> > --- a/include/linux/thermal.h
> > +++ b/include/linux/thermal.h
> > @@ -197,6 +197,7 @@ struct thermal_zone_device {
> >  	struct thermal_attr *trip_type_attrs;
> >  	struct thermal_attr *trip_hyst_attrs;
> >  	void *devdata;
> > +	enum thermal_device_mode mode;
> >  	int trips;
> >  	unsigned long trips_disabled;	/* bitmap for
> > disabled trips */
> >  	int passive_delay;
> > @@ -287,6 +288,9 @@ struct thermal_zone_params {
> >  	 */
> >  	bool no_hwmon;
> >  
> > +	/* a boolean to indicate if the thermal zone is disabled
> > or not */
> > +	bool disabled;
> > +
> >  	int num_tbps;	/* Number of tbp entries */
> >  	struct thermal_bind_params *tbp;
> >  
> > @@ -452,6 +456,8 @@ struct thermal_zone_device
> > *thermal_zone_get_zone_by_name(const char *name);
> >  int thermal_zone_get_temp(struct thermal_zone_device *tz, int
> > *temp);
> >  int thermal_zone_get_slope(struct thermal_zone_device *tz);
> >  int thermal_zone_get_offset(struct thermal_zone_device *tz);
> > +int thermal_zone_set_mode(struct thermal_zone_device *tz,
> > +			enum thermal_device_mode mode);
> >  
> >  int get_tz_trend(struct thermal_zone_device *, int);
> >  struct thermal_instance *get_thermal_instance(struct
> > thermal_zone_device *,
> > @@ -518,6 +524,9 @@ static inline int thermal_zone_get_slope(
> >  static inline int thermal_zone_get_offset(
> >  		struct thermal_zone_device *tz)
> >  { return -ENODEV; }
> > +static inline int thermal_zone_set_mode(struct thermal_zone_device
> > *tz,
> > +		enum thermal_device_mode mode);
> > +{ return -ENODEV; }
> >  static inline int get_tz_trend(struct thermal_zone_device *tz, int
> > trip)
> >  { return -ENODEV; }
> >  static inline struct thermal_instance *
Bartlomiej Zolnierkiewicz Nov. 6, 2018, 4:01 p.m. UTC | #7
On 11/06/2018 08:13 AM, Zhang Rui wrote:
> On 一, 2018-11-05 at 16:37 +0100, Bartlomiej Zolnierkiewicz wrote:
>> On 11/04/2018 06:35 PM, Zhang Rui wrote:
>>>
>>> Thermal "mode" sysfs attribute is introduced to enable/disable a
>>> thermal
>>> zone, and .set_mode() callback is introduced for platform thermal
>>> driver
>>> to enable/disable the hardware thermal control logic.
>>>
>>> But, thermal core does not handle disabled thermal zone well and
>>> this
>>> brings a couple of issues.
>>> 1. thermal core polling timer for disabled thermal zone still runs.
>>> 2. thermal core still pokes disabled thermal zone occasionally,
>>> e.g. upon
>>>    system resume.
>>> 3. platform thermal driver can not register a disabled thermal
>>> zone.
>> What about DT thermal drivers?
>>
> yes, I think DT thermal drivers should also be covered.
> The idea is to provide a mechanism so that any platform driver can
> register a disabled thermal zone.
> 
>>>
>>> To fix this, a new flag 'mode' is introduced in struct
>>> thermal_zone_device
>>> to represent the thermal zone mode, and several decisions have been
>>> made
>> What about cleaning private 'mode' implementations in da9062-thermal,
>> d8500_thermal, imx_thermal, int3400_thermal, intel_quark_dts_thermal
>> and
>> of-thermal?
> 
> yes, if we can leverage one flag, aka, the one in thermal_zone_device
> struct, then there is no reason to have private ones.
> 
>>>
>>> based on this flag, including
>>> a. check the thermal zone mode right after it's registered.
>>> b. skip updating thermal zone if the thermal zone is disabled.
>>> c. stop the polling timer when the thermal zone is disabled.
>>>
>>> TODO:
>>> This patch fixes issue 1 and 2, but for issue 3, for drivers that
>>> needs to
>>> register a disabled thermal zone, they should setup tzp->disalbed
>> disalbed -> disabled
>>
>>>
>>> explicitly during registration and invokes thermal_zone_set_mode()
>>> when
>>> the hardware is ready.
>>>
>>> Signed-off-by: Zhang Rui <rui.zhang@intel.com>
>>> ---
>>>  drivers/thermal/thermal_core.c  | 39
>>> ++++++++++++++++++++++++++++++++++++++-
>>>  drivers/thermal/thermal_sysfs.c | 27 +++++++--------------------
>>>  include/linux/thermal.h         |  9 +++++++++
>>>  3 files changed, 54 insertions(+), 21 deletions(-)
>>>
>>> diff --git a/drivers/thermal/thermal_core.c
>>> b/drivers/thermal/thermal_core.c
>>> index 98f02b0..65a984a 100644
>>> --- a/drivers/thermal/thermal_core.c
>>> +++ b/drivers/thermal/thermal_core.c
>>> @@ -305,7 +305,9 @@ static void monitor_thermal_zone(struct
>>> thermal_zone_device *tz)
>>>  {
>>>  	mutex_lock(&tz->lock);
>>>  
>>> -	if (tz->passive)
>>> +	if (tz->mode == THERMAL_DEVICE_DISABLED)
>>> +		thermal_zone_device_set_polling(tz, 0);
>>> +	else if (tz->passive)
>>>  		thermal_zone_device_set_polling(tz, tz-
>>>> passive_delay);
>>>  	else if (tz->polling_delay)
>>>  		thermal_zone_device_set_polling(tz, tz-
>>>> polling_delay);
>>> @@ -458,11 +460,33 @@ static void thermal_zone_device_reset(struct
>>> thermal_zone_device *tz)
>>>  		pos->initialized = false;
>>>  }
>>>  
>>> +int thermal_zone_set_mode(struct thermal_zone_device *tz,
>>> +				 enum thermal_device_mode mode)
>>> +{
>>> +	int result;
>>> +
>>> +	if (tz->mode == mode)
>>> +		return 0;
>>> +
>>> +	if (tz->ops->set_mode) {
>>> +		result = tz->ops->set_mode(tz, mode);
>>> +		if (result)
>>> +			return result;
>>> +	}
>>> +
>>> +	thermal_zone_device_update(tz, THERMAL_EVENT_UNSPECIFIED);
>> Keeping ->set_mode call coupled with thermal_zone_device_update()
>> one is problematic for drivers which use IRQ for reporting crossing
>> temperature thresholds.
>>
>> The desired initialization ordering seems to be:
>>
>> * mark sensor as enabled in the core
>> * request the driver specific IRQ handler
>> * enable sensor by the driver specific method
>> * check the sensor by the core
>>
>> to fix all initialization ordering issues.
> 
> I'm confused.
> I think you mean mark sensor as disabled in the first step?

Well, yes.. and no.. ;) Please see below.

>>
>> The problem with the "coupled" approach is that you need to update
>> sensor drivers to implement tz->ops->set_mode method (+in case of DT
>> drivers you need a way to implement driver specific ->set_mode itself
>> because they all currently use the generic of_thermal_set_mode()) to
>> fix all the initialization ordering issues.
>>
>> In my patchset ->set_mode and thermal_zone_device_update() operations
>> are separated so you can fix initialization ordering issues now and
>> worry about implementing ->set_mode later.
> 
> right.
> I was trying to do the same thing in this patch set, so the order would
> be
> 
> * mark sensor as disabled during registration (so that
> thermal_zone_device_update() won't touch the sensor)
> * request the driver specific IRQ handler
> * enable sensor by the driver specific method
> * call thermal_zone_set_mode() to tell thermal core to check the sensor

In case of DT drivers ->set_mode in vanilla kernel translates to
of_thermal_set_mode():

static int of_thermal_set_mode(struct thermal_zone_device *tz,
			       enum thermal_device_mode mode)
{
	struct __thermal_zone *data = tz->devdata;

	mutex_lock(&tz->lock);

	if (mode == THERMAL_DEVICE_ENABLED)
		tz->polling_delay = data->polling_delay;
	else
		tz->polling_delay = 0;

	mutex_unlock(&tz->lock);

	data->mode = mode;
	thermal_zone_device_update(tz, THERMAL_EVENT_UNSPECIFIED);

	return 0;
}

while the driver specific enable method is done without any
awareness of the thermal core, i.e.:

	thermal->chip->control(thermal->regs, true);

in rockchip_thermal.c, which just operates on sensor registers.

Please note that after above ->control() IRQs can arrive
immediately (which is unlikely but not impossible) and IRQ
handler can call thermal_zone_device_update().

Now please see what happens with "coupled" thermal_zone_set_mode()
in case of DT drivers:

* mark sensor as disabled during registration (so that
  thermal_zone_device_update() won't touch the sensor)
* request the driver specific IRQ handler
* thermal_zone_set_mode(enable)
-> call to thermal_zone_device_update() with sensor marked as
   enabled in the core but disabled in the hardware
* thermal->chip->control()

or

* mark sensor as disabled during registration (so that
  thermal_zone_device_update() won't touch the sensor)
* request the driver specific IRQ handler
* thermal->chip->control()
-> IRQ can happen immediately and result in call to
   thermal_zone_device_update() with sensor marked as
   disabled in the core but enabled in the hardware,
   this results in the thermal event not being handled
* thermal_zone_set_mode(enable)

With my patches what happens is:

* mark sensor as disabled during registration
  (by current code in thermal_of_build_thermal_zone())
* thermal_zone_set_mode(enable)
-> thermal_zone_device_update() not called here
* request the driver specific IRQ handler
* thermal->chip->control()
->IRQ can happen here and will be processed just fine
* thermal_zone_device_update()

Of course we can decide that we don't care about IRQ
arriving during initialization and go with "coupled"
thermal_zone_set_mode() but the current mainline code
seems to work for such unlikely cases (even if by
accident) so we should be careful with changing it..

> 
>>
>>>
>>> +	return 0;
>>> +}
>>> +EXPORT_SYMBOL_GPL(thermal_zone_set_mode);
>>> +
>>>  void thermal_zone_device_update(struct thermal_zone_device *tz,
>>>  				enum thermal_notify_event event)
>>>  {
>>>  	int count;
>>>  
>>> +	if (tz->mode == THERMAL_DEVICE_DISABLED)
>>> +		goto update_timer;
>>> +
>>>  	if (atomic_read(&in_suspend))
>>>  		return;
>>>  
>>> @@ -477,6 +501,8 @@ void thermal_zone_device_update(struct
>>> thermal_zone_device *tz,
>>>  
>>>  	for (count = 0; count < tz->trips; count++)
>>>  		handle_thermal_trip(tz, count);
>>> +
>>> +update_timer:
>>>  	monitor_thermal_zone(tz);
>>>  }
>>>  EXPORT_SYMBOL_GPL(thermal_zone_device_update);
>>> @@ -1275,6 +1301,17 @@ thermal_zone_device_register(const char
>>> *type, int trips, int mask,
>>>  	INIT_DELAYED_WORK(&tz->poll_queue,
>>> thermal_zone_device_check);
>>>  
>>>  	thermal_zone_device_reset(tz);
>>> +
>>> +	/*
>>> +	 * As .get_mode() is not guaranteed to work at this
>>> moment, to register
>>> +	 * a disabled thermal zone, tzp->disalbed must be set
>>> explicitly.
>> disalbed -> disabled
>>
>>>
>>> +	 */
>>> +	if (tz->tzp) {
>>> +		tz->mode = tzp->disabled == true ?
>>> THERMAL_DEVICE_DISABLED :
>>> +						   THERMAL_DEVICE_
>>> ENABLED;
>> Your patch only fixes thermal_zone_device_register(), what about
>> fixing [devm_]thermal_zone_of_sensor_register() which is actually
>> used by DT thermal drivers?
> 
> that's next step.
> This is a draft patch to address my proposal for the issues.
> And the basic idea is to separate thermal_zone_device registration and
> thermal_zone_device_update() so that drivers can invoke
> thermal_zone_device_update() (via thermal_zone_set_mode()) only after
> it's ready.
> 
> And the difference between your proposal and this one is that, I'd like
> to see if we can solve all the problems by implementing the "mode"
> control in one place, aka, thermal core, instead of of_thermal and any 
> platform thermal drivers.

Could please explain more what do you mean by that?

In my patches there is just one place for controlling 'mode'
which is thermal_zone_set_mode() introduced in patch #1:

int thermal_zone_set_mode(struct thermal_zone_device *tz,
                         enum thermal_device_mode mode)
{
       return tz->ops->set_mode(tz, mode);
}

and it can be easily later evolved to be more complex one
(i.e. one in your patch).

Moreover patch #4 moves 'mode' control out from of-thermal.c
(present in vanilla kernel) to respective DT drivers.

Also my patchset has no functionality changes for platform
drivers.. 

> thanks,
> rui
>>
>>>
>>> +	} else
>>> +		tz->mode = THERMAL_DEVICE_ENABLED;
>>> +
>>>  	/* Update the new thermal zone and mark it as already
>>> updated. */
>>>  	if (atomic_cmpxchg(&tz->need_update, 1, 0))
>>>  		thermal_zone_device_update(tz,
>>> THERMAL_EVENT_UNSPECIFIED);
>>> diff --git a/drivers/thermal/thermal_sysfs.c
>>> b/drivers/thermal/thermal_sysfs.c
>>> index 2241cea..8d144d4 100644
>>> --- a/drivers/thermal/thermal_sysfs.c
>>> +++ b/drivers/thermal/thermal_sysfs.c
>>> @@ -49,18 +49,9 @@ static ssize_t
>>>  mode_show(struct device *dev, struct device_attribute *attr, char
>>> *buf)
>>>  {
>>>  	struct thermal_zone_device *tz = to_thermal_zone(dev);
>>> -	enum thermal_device_mode mode;
>>> -	int result;
>>> -
>>> -	if (!tz->ops->get_mode)
>>> -		return -EPERM;
>>> -
>>> -	result = tz->ops->get_mode(tz, &mode);
>>> -	if (result)
>>> -		return result;
>>>  
>>> -	return sprintf(buf, "%s\n", mode == THERMAL_DEVICE_ENABLED
>>> ? "enabled"
>>> -		       : "disabled");
>>> +	return sprintf(buf, "%s\n", tz->mode ==
>>> THERMAL_DEVICE_ENABLED ?
>>> +			"enabled" : "disabled");
>>>  }
>>>  
>>>  static ssize_t
>>> @@ -68,20 +59,16 @@ mode_store(struct device *dev, struct
>>> device_attribute *attr,
>>>  	   const char *buf, size_t count)
>>>  {
>>>  	struct thermal_zone_device *tz = to_thermal_zone(dev);
>>> -	int result;
>>> -
>>> -	if (!tz->ops->set_mode)
>>> -		return -EPERM;
>>> +	enum thermal_device_mode mode;
>>>  
>>>  	if (!strncmp(buf, "enabled", sizeof("enabled") - 1))
>>> -		result = tz->ops->set_mode(tz,
>>> THERMAL_DEVICE_ENABLED);
>>> +		mode = THERMAL_DEVICE_ENABLED;
>>>  	else if (!strncmp(buf, "disabled", sizeof("disabled") -
>>> 1))
>>> -		result = tz->ops->set_mode(tz,
>>> THERMAL_DEVICE_DISABLED);
>>> +		mode = THERMAL_DEVICE_DISABLED;
>>>  	else
>>> -		result = -EINVAL;
>>> +		return -EINVAL;
>>>  
>>> -	if (result)
>>> -		return result;
>>> +	thermal_zone_set_mode(tz, mode);
>> Why is thermal_zone_set_mode() return value ignored?
>>
>>>
>>>  	return count;
>>>  }
>>> diff --git a/include/linux/thermal.h b/include/linux/thermal.h
>>> index 5f4705f..67e4b5b 100644
>>> --- a/include/linux/thermal.h
>>> +++ b/include/linux/thermal.h
>>> @@ -197,6 +197,7 @@ struct thermal_zone_device {
>>>  	struct thermal_attr *trip_type_attrs;
>>>  	struct thermal_attr *trip_hyst_attrs;
>>>  	void *devdata;
>>> +	enum thermal_device_mode mode;
>>>  	int trips;
>>>  	unsigned long trips_disabled;	/* bitmap for
>>> disabled trips */
>>>  	int passive_delay;
>>> @@ -287,6 +288,9 @@ struct thermal_zone_params {
>>>  	 */
>>>  	bool no_hwmon;
>>>  
>>> +	/* a boolean to indicate if the thermal zone is disabled
>>> or not */
>>> +	bool disabled;
>>> +
>>>  	int num_tbps;	/* Number of tbp entries */
>>>  	struct thermal_bind_params *tbp;
>>>  
>>> @@ -452,6 +456,8 @@ struct thermal_zone_device
>>> *thermal_zone_get_zone_by_name(const char *name);
>>>  int thermal_zone_get_temp(struct thermal_zone_device *tz, int
>>> *temp);
>>>  int thermal_zone_get_slope(struct thermal_zone_device *tz);
>>>  int thermal_zone_get_offset(struct thermal_zone_device *tz);
>>> +int thermal_zone_set_mode(struct thermal_zone_device *tz,
>>> +			enum thermal_device_mode mode);
>>>  
>>>  int get_tz_trend(struct thermal_zone_device *, int);
>>>  struct thermal_instance *get_thermal_instance(struct
>>> thermal_zone_device *,
>>> @@ -518,6 +524,9 @@ static inline int thermal_zone_get_slope(
>>>  static inline int thermal_zone_get_offset(
>>>  		struct thermal_zone_device *tz)
>>>  { return -ENODEV; }
>>> +static inline int thermal_zone_set_mode(struct thermal_zone_device
>>> *tz,
>>> +		enum thermal_device_mode mode);
>>> +{ return -ENODEV; }
>>>  static inline int get_tz_trend(struct thermal_zone_device *tz, int
>>> trip)
>>>  { return -ENODEV; }
>>>  static inline struct thermal_instance *

Best regards,
--
Bartlomiej Zolnierkiewicz
Samsung R&D Institute Poland
Samsung Electronics
Zhang Rui Nov. 7, 2018, 5:55 p.m. UTC | #8
On 二, 2018-11-06 at 17:01 +0100, Bartlomiej Zolnierkiewicz wrote:
> On 11/06/2018 08:13 AM, Zhang Rui wrote:
> > 
> > On 一, 2018-11-05 at 16:37 +0100, Bartlomiej Zolnierkiewicz wrote:
> > > 
> > > On 11/04/2018 06:35 PM, Zhang Rui wrote:
> > > > 
> > > > 
> > > > Thermal "mode" sysfs attribute is introduced to enable/disable
> > > > a
> > > > thermal
> > > > zone, and .set_mode() callback is introduced for platform
> > > > thermal
> > > > driver
> > > > to enable/disable the hardware thermal control logic.
> > > > 
> > > > But, thermal core does not handle disabled thermal zone well
> > > > and
> > > > this
> > > > brings a couple of issues.
> > > > 1. thermal core polling timer for disabled thermal zone still
> > > > runs.
> > > > 2. thermal core still pokes disabled thermal zone occasionally,
> > > > e.g. upon
> > > >    system resume.
> > > > 3. platform thermal driver can not register a disabled thermal
> > > > zone.
> > > What about DT thermal drivers?
> > > 
> > yes, I think DT thermal drivers should also be covered.
> > The idea is to provide a mechanism so that any platform driver can
> > register a disabled thermal zone.
> > 
> > > 
> > > > 
> > > > 
> > > > To fix this, a new flag 'mode' is introduced in struct
> > > > thermal_zone_device
> > > > to represent the thermal zone mode, and several decisions have
> > > > been
> > > > made
> > > What about cleaning private 'mode' implementations in da9062-
> > > thermal,
> > > d8500_thermal, imx_thermal, int3400_thermal,
> > > intel_quark_dts_thermal
> > > and
> > > of-thermal?
> > yes, if we can leverage one flag, aka, the one in
> > thermal_zone_device
> > struct, then there is no reason to have private ones.
> > 
> > > 
> > > > 
> > > > 
> > > > based on this flag, including
> > > > a. check the thermal zone mode right after it's registered.
> > > > b. skip updating thermal zone if the thermal zone is disabled.
> > > > c. stop the polling timer when the thermal zone is disabled.
> > > > 
> > > > TODO:
> > > > This patch fixes issue 1 and 2, but for issue 3, for drivers
> > > > that
> > > > needs to
> > > > register a disabled thermal zone, they should setup tzp-
> > > > >disalbed
> > > disalbed -> disabled
> > > 
> > > > 
> > > > 
> > > > explicitly during registration and invokes
> > > > thermal_zone_set_mode()
> > > > when
> > > > the hardware is ready.
> > > > 
> > > > Signed-off-by: Zhang Rui <rui.zhang@intel.com>
> > > > ---
> > > >  drivers/thermal/thermal_core.c  | 39
> > > > ++++++++++++++++++++++++++++++++++++++-
> > > >  drivers/thermal/thermal_sysfs.c | 27 +++++++----------------
> > > > ----
> > > >  include/linux/thermal.h         |  9 +++++++++
> > > >  3 files changed, 54 insertions(+), 21 deletions(-)
> > > > 
> > > > diff --git a/drivers/thermal/thermal_core.c
> > > > b/drivers/thermal/thermal_core.c
> > > > index 98f02b0..65a984a 100644
> > > > --- a/drivers/thermal/thermal_core.c
> > > > +++ b/drivers/thermal/thermal_core.c
> > > > @@ -305,7 +305,9 @@ static void monitor_thermal_zone(struct
> > > > thermal_zone_device *tz)
> > > >  {
> > > >  	mutex_lock(&tz->lock);
> > > >  
> > > > -	if (tz->passive)
> > > > +	if (tz->mode == THERMAL_DEVICE_DISABLED)
> > > > +		thermal_zone_device_set_polling(tz, 0);
> > > > +	else if (tz->passive)
> > > >  		thermal_zone_device_set_polling(tz, tz-
> > > > > 
> > > > > passive_delay);
> > > >  	else if (tz->polling_delay)
> > > >  		thermal_zone_device_set_polling(tz, tz-
> > > > > 
> > > > > polling_delay);
> > > > @@ -458,11 +460,33 @@ static void
> > > > thermal_zone_device_reset(struct
> > > > thermal_zone_device *tz)
> > > >  		pos->initialized = false;
> > > >  }
> > > >  
> > > > +int thermal_zone_set_mode(struct thermal_zone_device *tz,
> > > > +				 enum thermal_device_mode
> > > > mode)
> > > > +{
> > > > +	int result;
> > > > +
> > > > +	if (tz->mode == mode)
> > > > +		return 0;
> > > > +
> > > > +	if (tz->ops->set_mode) {
> > > > +		result = tz->ops->set_mode(tz, mode);
> > > > +		if (result)
> > > > +			return result;
> > > > +	}
> > > > +
> > > > +	thermal_zone_device_update(tz,
> > > > THERMAL_EVENT_UNSPECIFIED);
> > > Keeping ->set_mode call coupled with thermal_zone_device_update()
> > > one is problematic for drivers which use IRQ for reporting
> > > crossing
> > > temperature thresholds.
> > > 
> > > The desired initialization ordering seems to be:
> > > 
> > > * mark sensor as enabled in the core
> > > * request the driver specific IRQ handler
> > > * enable sensor by the driver specific method
> > > * check the sensor by the core
> > > 
> > > to fix all initialization ordering issues.
> > I'm confused.
> > I think you mean mark sensor as disabled in the first step?
> Well, yes.. and no.. ;) Please see below.
> 
> > 
> > > 
> > > 
> > > The problem with the "coupled" approach is that you need to
> > > update
> > > sensor drivers to implement tz->ops->set_mode method (+in case of
> > > DT
> > > drivers you need a way to implement driver specific ->set_mode
> > > itself
> > > because they all currently use the generic of_thermal_set_mode())
> > > to
> > > fix all the initialization ordering issues.
> > > 
> > > In my patchset ->set_mode and thermal_zone_device_update()
> > > operations
> > > are separated so you can fix initialization ordering issues now
> > > and
> > > worry about implementing ->set_mode later.
> > right.
> > I was trying to do the same thing in this patch set, so the order
> > would
> > be
> > 
> > * mark sensor as disabled during registration (so that
> > thermal_zone_device_update() won't touch the sensor)
> > * request the driver specific IRQ handler
> > * enable sensor by the driver specific method
> > * call thermal_zone_set_mode() to tell thermal core to check the
> > sensor
> In case of DT drivers ->set_mode in vanilla kernel translates to
> of_thermal_set_mode():
> 
> static int of_thermal_set_mode(struct thermal_zone_device *tz,
> 			       enum thermal_device_mode mode)
> {
> 	struct __thermal_zone *data = tz->devdata;
> 
> 	mutex_lock(&tz->lock);
> 
> 	if (mode == THERMAL_DEVICE_ENABLED)
> 		tz->polling_delay = data->polling_delay;
> 	else
> 		tz->polling_delay = 0;
> 
> 	mutex_unlock(&tz->lock);
> 
> 	data->mode = mode;
> 	thermal_zone_device_update(tz, THERMAL_EVENT_UNSPECIFIED);
> 
> 	return 0;
> }
> 
> while the driver specific enable method is done without any
> awareness of the thermal core, i.e.:
> 
> 	thermal->chip->control(thermal->regs, true);
> 
> in rockchip_thermal.c, which just operates on sensor registers.
> 
> Please note that after above ->control() IRQs can arrive
> immediately (which is unlikely but not impossible) and IRQ
> handler can call thermal_zone_device_update().
> 
> Now please see what happens with "coupled" thermal_zone_set_mode()
> in case of DT drivers:
> 
> * mark sensor as disabled during registration (so that
>   thermal_zone_device_update() won't touch the sensor)
> * request the driver specific IRQ handler
> * thermal_zone_set_mode(enable)
> -> call to thermal_zone_device_update() with sensor marked as
>    enabled in the core but disabled in the hardware
> * thermal->chip->control()
> 
> or
> 
> * mark sensor as disabled during registration (so that
>   thermal_zone_device_update() won't touch the sensor)
> * request the driver specific IRQ handler
> * thermal->chip->control()
> -> IRQ can happen immediately and result in call to
>    thermal_zone_device_update() with sensor marked as
>    disabled in the core but enabled in the hardware,
>    this results in the thermal event not being handled
> * thermal_zone_set_mode(enable)
> With my patches what happens is:
> 
> * mark sensor as disabled during registration
>   (by current code in thermal_of_build_thermal_zone())

thanks to your detailed explanation of the problem.

The only question to me is that, in this phase,
a. sensor is disabled from of-thermal' point of view, but thermal core
doesn't handle "mode" properly, thermal_zone_device_update() still
pokes the sensor.
b. thermal_of_build_thermal_zone() invokes
thermal_zone_device_register(), which in turn invokes
thermal_zone_device_update().
This does not cause real issue probably because data->ops->get_temp is
not set at this point, but this still sounds like a problem to me,
especially for non-DT thermal drivers.

Anyway, I will apply your patch set first, and bring the other
questions to plumber.

thanks,
rui

> * thermal_zone_set_mode(enable)
> -> thermal_zone_device_update() not called here
> * request the driver specific IRQ handler
> * thermal->chip->control()
> ->IRQ can happen here and will be processed just fine
> * thermal_zone_device_update()
> 
> Of course we can decide that we don't care about IRQ
> arriving during initialization and go with "coupled"
> thermal_zone_set_mode() but the current mainline code
> seems to work for such unlikely cases (even if by
> accident) so we should be careful with changing it..
> 
> > 
> > 
> > > 
> > > 
> > > > 
> > > > 
> > > > +	return 0;
> > > > +}
> > > > +EXPORT_SYMBOL_GPL(thermal_zone_set_mode);
> > > > +
> > > >  void thermal_zone_device_update(struct thermal_zone_device
> > > > *tz,
> > > >  				enum thermal_notify_event
> > > > event)
> > > >  {
> > > >  	int count;
> > > >  
> > > > +	if (tz->mode == THERMAL_DEVICE_DISABLED)
> > > > +		goto update_timer;
> > > > +
> > > >  	if (atomic_read(&in_suspend))
> > > >  		return;
> > > >  
> > > > @@ -477,6 +501,8 @@ void thermal_zone_device_update(struct
> > > > thermal_zone_device *tz,
> > > >  
> > > >  	for (count = 0; count < tz->trips; count++)
> > > >  		handle_thermal_trip(tz, count);
> > > > +
> > > > +update_timer:
> > > >  	monitor_thermal_zone(tz);
> > > >  }
> > > >  EXPORT_SYMBOL_GPL(thermal_zone_device_update);
> > > > @@ -1275,6 +1301,17 @@ thermal_zone_device_register(const char
> > > > *type, int trips, int mask,
> > > >  	INIT_DELAYED_WORK(&tz->poll_queue,
> > > > thermal_zone_device_check);
> > > >  
> > > >  	thermal_zone_device_reset(tz);
> > > > +
> > > > +	/*
> > > > +	 * As .get_mode() is not guaranteed to work at this
> > > > moment, to register
> > > > +	 * a disabled thermal zone, tzp->disalbed must be set
> > > > explicitly.
> > > disalbed -> disabled
> > > 
> > > > 
> > > > 
> > > > +	 */
> > > > +	if (tz->tzp) {
> > > > +		tz->mode = tzp->disabled == true ?
> > > > THERMAL_DEVICE_DISABLED :
> > > > +						   THERMAL_DEV
> > > > ICE_
> > > > ENABLED;
> > > Your patch only fixes thermal_zone_device_register(), what about
> > > fixing [devm_]thermal_zone_of_sensor_register() which is actually
> > > used by DT thermal drivers?
> > that's next step.
> > This is a draft patch to address my proposal for the issues.
> > And the basic idea is to separate thermal_zone_device registration
> > and
> > thermal_zone_device_update() so that drivers can invoke
> > thermal_zone_device_update() (via thermal_zone_set_mode()) only
> > after
> > it's ready.
> > 
> > And the difference between your proposal and this one is that, I'd
> > like
> > to see if we can solve all the problems by implementing the "mode"
> > control in one place, aka, thermal core, instead of of_thermal and
> > any 
> > platform thermal drivers.
> Could please explain more what do you mean by that?
> 
> In my patches there is just one place for controlling 'mode'
> which is thermal_zone_set_mode() introduced in patch #1:
> 
> int thermal_zone_set_mode(struct thermal_zone_device *tz,
>                          enum thermal_device_mode mode)
> {
>        return tz->ops->set_mode(tz, mode);
> }
> 
> and it can be easily later evolved to be more complex one
> (i.e. one in your patch).
> 
> Moreover patch #4 moves 'mode' control out from of-thermal.c
> (present in vanilla kernel) to respective DT drivers.
> 
> Also my patchset has no functionality changes for platform
> drivers.. 
> > 
> > > 
> > > 
> > > > 
> > > > 
> > > > +	} else
> > > > +		tz->mode = THERMAL_DEVICE_ENABLED;
> > > > +
> > > >  	/* Update the new thermal zone and mark it as already
> > > > updated. */
> > > >  	if (atomic_cmpxchg(&tz->need_update, 1, 0))
> > > >  		thermal_zone_device_update(tz,
> > > > THERMAL_EVENT_UNSPECIFIED);
> > > > diff --git a/drivers/thermal/thermal_sysfs.c
> > > > b/drivers/thermal/thermal_sysfs.c
> > > > index 2241cea..8d144d4 100644
> > > > --- a/drivers/thermal/thermal_sysfs.c
> > > > +++ b/drivers/thermal/thermal_sysfs.c
> > > > @@ -49,18 +49,9 @@ static ssize_t
> > > >  mode_show(struct device *dev, struct device_attribute *attr,
> > > > char
> > > > *buf)
> > > >  {
> > > >  	struct thermal_zone_device *tz = to_thermal_zone(dev);
> > > > -	enum thermal_device_mode mode;
> > > > -	int result;
> > > > -
> > > > -	if (!tz->ops->get_mode)
> > > > -		return -EPERM;
> > > > -
> > > > -	result = tz->ops->get_mode(tz, &mode);
> > > > -	if (result)
> > > > -		return result;
> > > >  
> > > > -	return sprintf(buf, "%s\n", mode ==
> > > > THERMAL_DEVICE_ENABLED
> > > > ? "enabled"
> > > > -		       : "disabled");
> > > > +	return sprintf(buf, "%s\n", tz->mode ==
> > > > THERMAL_DEVICE_ENABLED ?
> > > > +			"enabled" : "disabled");
> > > >  }
> > > >  
> > > >  static ssize_t
> > > > @@ -68,20 +59,16 @@ mode_store(struct device *dev, struct
> > > > device_attribute *attr,
> > > >  	   const char *buf, size_t count)
> > > >  {
> > > >  	struct thermal_zone_device *tz = to_thermal_zone(dev);
> > > > -	int result;
> > > > -
> > > > -	if (!tz->ops->set_mode)
> > > > -		return -EPERM;
> > > > +	enum thermal_device_mode mode;
> > > >  
> > > >  	if (!strncmp(buf, "enabled", sizeof("enabled") - 1))
> > > > -		result = tz->ops->set_mode(tz,
> > > > THERMAL_DEVICE_ENABLED);
> > > > +		mode = THERMAL_DEVICE_ENABLED;
> > > >  	else if (!strncmp(buf, "disabled", sizeof("disabled")
> > > > -
> > > > 1))
> > > > -		result = tz->ops->set_mode(tz,
> > > > THERMAL_DEVICE_DISABLED);
> > > > +		mode = THERMAL_DEVICE_DISABLED;
> > > >  	else
> > > > -		result = -EINVAL;
> > > > +		return -EINVAL;
> > > >  
> > > > -	if (result)
> > > > -		return result;
> > > > +	thermal_zone_set_mode(tz, mode);
> > > Why is thermal_zone_set_mode() return value ignored?
> > > 
> > > > 
> > > > 
> > > >  	return count;
> > > >  }
> > > > diff --git a/include/linux/thermal.h b/include/linux/thermal.h
> > > > index 5f4705f..67e4b5b 100644
> > > > --- a/include/linux/thermal.h
> > > > +++ b/include/linux/thermal.h
> > > > @@ -197,6 +197,7 @@ struct thermal_zone_device {
> > > >  	struct thermal_attr *trip_type_attrs;
> > > >  	struct thermal_attr *trip_hyst_attrs;
> > > >  	void *devdata;
> > > > +	enum thermal_device_mode mode;
> > > >  	int trips;
> > > >  	unsigned long trips_disabled;	/* bitmap for
> > > > disabled trips */
> > > >  	int passive_delay;
> > > > @@ -287,6 +288,9 @@ struct thermal_zone_params {
> > > >  	 */
> > > >  	bool no_hwmon;
> > > >  
> > > > +	/* a boolean to indicate if the thermal zone is
> > > > disabled
> > > > or not */
> > > > +	bool disabled;
> > > > +
> > > >  	int num_tbps;	/* Number of tbp entries */
> > > >  	struct thermal_bind_params *tbp;
> > > >  
> > > > @@ -452,6 +456,8 @@ struct thermal_zone_device
> > > > *thermal_zone_get_zone_by_name(const char *name);
> > > >  int thermal_zone_get_temp(struct thermal_zone_device *tz, int
> > > > *temp);
> > > >  int thermal_zone_get_slope(struct thermal_zone_device *tz);
> > > >  int thermal_zone_get_offset(struct thermal_zone_device *tz);
> > > > +int thermal_zone_set_mode(struct thermal_zone_device *tz,
> > > > +			enum thermal_device_mode mode);
> > > >  
> > > >  int get_tz_trend(struct thermal_zone_device *, int);
> > > >  struct thermal_instance *get_thermal_instance(struct
> > > > thermal_zone_device *,
> > > > @@ -518,6 +524,9 @@ static inline int thermal_zone_get_slope(
> > > >  static inline int thermal_zone_get_offset(
> > > >  		struct thermal_zone_device *tz)
> > > >  { return -ENODEV; }
> > > > +static inline int thermal_zone_set_mode(struct
> > > > thermal_zone_device
> > > > *tz,
> > > > +		enum thermal_device_mode mode);
> > > > +{ return -ENODEV; }
> > > >  static inline int get_tz_trend(struct thermal_zone_device *tz,
> > > > int
> > > > trip)
> > > >  { return -ENODEV; }
> > > >  static inline struct thermal_instance *
> Best regards,
> --
> Bartlomiej Zolnierkiewicz
> Samsung R&D Institute Poland
> Samsung Electronics
Bartlomiej Zolnierkiewicz Nov. 8, 2018, 4:46 p.m. UTC | #9
On 11/07/2018 06:55 PM, Zhang Rui wrote:
> On 二, 2018-11-06 at 17:01 +0100, Bartlomiej Zolnierkiewicz wrote:
>> On 11/06/2018 08:13 AM, Zhang Rui wrote:
>>>
>>> On 一, 2018-11-05 at 16:37 +0100, Bartlomiej Zolnierkiewicz wrote:
>>>>
>>>> On 11/04/2018 06:35 PM, Zhang Rui wrote:
>>>>>
>>>>>
>>>>> Thermal "mode" sysfs attribute is introduced to enable/disable
>>>>> a
>>>>> thermal
>>>>> zone, and .set_mode() callback is introduced for platform
>>>>> thermal
>>>>> driver
>>>>> to enable/disable the hardware thermal control logic.
>>>>>
>>>>> But, thermal core does not handle disabled thermal zone well
>>>>> and
>>>>> this
>>>>> brings a couple of issues.
>>>>> 1. thermal core polling timer for disabled thermal zone still
>>>>> runs.
>>>>> 2. thermal core still pokes disabled thermal zone occasionally,
>>>>> e.g. upon
>>>>>    system resume.
>>>>> 3. platform thermal driver can not register a disabled thermal
>>>>> zone.
>>>> What about DT thermal drivers?
>>>>
>>> yes, I think DT thermal drivers should also be covered.
>>> The idea is to provide a mechanism so that any platform driver can
>>> register a disabled thermal zone.
>>>
>>>>
>>>>>
>>>>>
>>>>> To fix this, a new flag 'mode' is introduced in struct
>>>>> thermal_zone_device
>>>>> to represent the thermal zone mode, and several decisions have
>>>>> been
>>>>> made
>>>> What about cleaning private 'mode' implementations in da9062-
>>>> thermal,
>>>> d8500_thermal, imx_thermal, int3400_thermal,
>>>> intel_quark_dts_thermal
>>>> and
>>>> of-thermal?
>>> yes, if we can leverage one flag, aka, the one in
>>> thermal_zone_device
>>> struct, then there is no reason to have private ones.
>>>
>>>>
>>>>>
>>>>>
>>>>> based on this flag, including
>>>>> a. check the thermal zone mode right after it's registered.
>>>>> b. skip updating thermal zone if the thermal zone is disabled.
>>>>> c. stop the polling timer when the thermal zone is disabled.
>>>>>
>>>>> TODO:
>>>>> This patch fixes issue 1 and 2, but for issue 3, for drivers
>>>>> that
>>>>> needs to
>>>>> register a disabled thermal zone, they should setup tzp-
>>>>>> disalbed
>>>> disalbed -> disabled
>>>>
>>>>>
>>>>>
>>>>> explicitly during registration and invokes
>>>>> thermal_zone_set_mode()
>>>>> when
>>>>> the hardware is ready.
>>>>>
>>>>> Signed-off-by: Zhang Rui <rui.zhang@intel.com>
>>>>> ---
>>>>>  drivers/thermal/thermal_core.c  | 39
>>>>> ++++++++++++++++++++++++++++++++++++++-
>>>>>  drivers/thermal/thermal_sysfs.c | 27 +++++++----------------
>>>>> ----
>>>>>  include/linux/thermal.h         |  9 +++++++++
>>>>>  3 files changed, 54 insertions(+), 21 deletions(-)
>>>>>
>>>>> diff --git a/drivers/thermal/thermal_core.c
>>>>> b/drivers/thermal/thermal_core.c
>>>>> index 98f02b0..65a984a 100644
>>>>> --- a/drivers/thermal/thermal_core.c
>>>>> +++ b/drivers/thermal/thermal_core.c
>>>>> @@ -305,7 +305,9 @@ static void monitor_thermal_zone(struct
>>>>> thermal_zone_device *tz)
>>>>>  {
>>>>>  	mutex_lock(&tz->lock);
>>>>>  
>>>>> -	if (tz->passive)
>>>>> +	if (tz->mode == THERMAL_DEVICE_DISABLED)
>>>>> +		thermal_zone_device_set_polling(tz, 0);
>>>>> +	else if (tz->passive)
>>>>>  		thermal_zone_device_set_polling(tz, tz-
>>>>>>
>>>>>> passive_delay);
>>>>>  	else if (tz->polling_delay)
>>>>>  		thermal_zone_device_set_polling(tz, tz-
>>>>>>
>>>>>> polling_delay);
>>>>> @@ -458,11 +460,33 @@ static void
>>>>> thermal_zone_device_reset(struct
>>>>> thermal_zone_device *tz)
>>>>>  		pos->initialized = false;
>>>>>  }
>>>>>  
>>>>> +int thermal_zone_set_mode(struct thermal_zone_device *tz,
>>>>> +				 enum thermal_device_mode
>>>>> mode)
>>>>> +{
>>>>> +	int result;
>>>>> +
>>>>> +	if (tz->mode == mode)
>>>>> +		return 0;
>>>>> +
>>>>> +	if (tz->ops->set_mode) {
>>>>> +		result = tz->ops->set_mode(tz, mode);
>>>>> +		if (result)
>>>>> +			return result;
>>>>> +	}
>>>>> +
>>>>> +	thermal_zone_device_update(tz,
>>>>> THERMAL_EVENT_UNSPECIFIED);
>>>> Keeping ->set_mode call coupled with thermal_zone_device_update()
>>>> one is problematic for drivers which use IRQ for reporting
>>>> crossing
>>>> temperature thresholds.
>>>>
>>>> The desired initialization ordering seems to be:
>>>>
>>>> * mark sensor as enabled in the core
>>>> * request the driver specific IRQ handler
>>>> * enable sensor by the driver specific method
>>>> * check the sensor by the core
>>>>
>>>> to fix all initialization ordering issues.
>>> I'm confused.
>>> I think you mean mark sensor as disabled in the first step?
>> Well, yes.. and no.. ;) Please see below.
>>
>>>
>>>>
>>>>
>>>> The problem with the "coupled" approach is that you need to
>>>> update
>>>> sensor drivers to implement tz->ops->set_mode method (+in case of
>>>> DT
>>>> drivers you need a way to implement driver specific ->set_mode
>>>> itself
>>>> because they all currently use the generic of_thermal_set_mode())
>>>> to
>>>> fix all the initialization ordering issues.
>>>>
>>>> In my patchset ->set_mode and thermal_zone_device_update()
>>>> operations
>>>> are separated so you can fix initialization ordering issues now
>>>> and
>>>> worry about implementing ->set_mode later.
>>> right.
>>> I was trying to do the same thing in this patch set, so the order
>>> would
>>> be
>>>
>>> * mark sensor as disabled during registration (so that
>>> thermal_zone_device_update() won't touch the sensor)
>>> * request the driver specific IRQ handler
>>> * enable sensor by the driver specific method
>>> * call thermal_zone_set_mode() to tell thermal core to check the
>>> sensor
>> In case of DT drivers ->set_mode in vanilla kernel translates to
>> of_thermal_set_mode():
>>
>> static int of_thermal_set_mode(struct thermal_zone_device *tz,
>> 			       enum thermal_device_mode mode)
>> {
>> 	struct __thermal_zone *data = tz->devdata;
>>
>> 	mutex_lock(&tz->lock);
>>
>> 	if (mode == THERMAL_DEVICE_ENABLED)
>> 		tz->polling_delay = data->polling_delay;
>> 	else
>> 		tz->polling_delay = 0;
>>
>> 	mutex_unlock(&tz->lock);
>>
>> 	data->mode = mode;
>> 	thermal_zone_device_update(tz, THERMAL_EVENT_UNSPECIFIED);
>>
>> 	return 0;
>> }
>>
>> while the driver specific enable method is done without any
>> awareness of the thermal core, i.e.:
>>
>> 	thermal->chip->control(thermal->regs, true);
>>
>> in rockchip_thermal.c, which just operates on sensor registers.
>>
>> Please note that after above ->control() IRQs can arrive
>> immediately (which is unlikely but not impossible) and IRQ
>> handler can call thermal_zone_device_update().
>>
>> Now please see what happens with "coupled" thermal_zone_set_mode()
>> in case of DT drivers:
>>
>> * mark sensor as disabled during registration (so that
>>   thermal_zone_device_update() won't touch the sensor)
>> * request the driver specific IRQ handler
>> * thermal_zone_set_mode(enable)
>> -> call to thermal_zone_device_update() with sensor marked as
>>    enabled in the core but disabled in the hardware
>> * thermal->chip->control()
>>
>> or
>>
>> * mark sensor as disabled during registration (so that
>>   thermal_zone_device_update() won't touch the sensor)
>> * request the driver specific IRQ handler
>> * thermal->chip->control()
>> -> IRQ can happen immediately and result in call to
>>    thermal_zone_device_update() with sensor marked as
>>    disabled in the core but enabled in the hardware,
>>    this results in the thermal event not being handled
>> * thermal_zone_set_mode(enable)
>> With my patches what happens is:
>>
>> * mark sensor as disabled during registration
>>   (by current code in thermal_of_build_thermal_zone())
> 
> thanks to your detailed explanation of the problem.
> 
> The only question to me is that, in this phase,
> a. sensor is disabled from of-thermal' point of view, but thermal core
> doesn't handle "mode" properly, thermal_zone_device_update() still
> pokes the sensor.
> b. thermal_of_build_thermal_zone() invokes
> thermal_zone_device_register(), which in turn invokes
> thermal_zone_device_update().
> This does not cause real issue probably because data->ops->get_temp is
> not set at this point, but this still sounds like a problem to me,
> especially for non-DT thermal drivers.

Yes, your analysis is correct.

> Anyway, I will apply your patch set first, and bring the other
> questions to plumber.

Great, do you still have some comments to my patches? I would like to
post v3 some time soon (it is needed due to -next thermal changes: 
hisi_thermal driver update and addition of stm_thermal driver).

Best regards,
--
Bartlomiej Zolnierkiewicz
Samsung R&D Institute Poland
Samsung Electronics

> thanks,
> rui
>>  
>> * thermal_zone_set_mode(enable)
>> -> thermal_zone_device_update() not called here
>> * request the driver specific IRQ handler
>> * thermal->chip->control()
>> ->IRQ can happen here and will be processed just fine
>> * thermal_zone_device_update()
>>
>> Of course we can decide that we don't care about IRQ
>> arriving during initialization and go with "coupled"
>> thermal_zone_set_mode() but the current mainline code
>> seems to work for such unlikely cases (even if by
>> accident) so we should be careful with changing it..
>>
>>>
>>>
>>>>
>>>>
>>>>>
>>>>>
>>>>> +	return 0;
>>>>> +}
>>>>> +EXPORT_SYMBOL_GPL(thermal_zone_set_mode);
>>>>> +
>>>>>  void thermal_zone_device_update(struct thermal_zone_device
>>>>> *tz,
>>>>>  				enum thermal_notify_event
>>>>> event)
>>>>>  {
>>>>>  	int count;
>>>>>  
>>>>> +	if (tz->mode == THERMAL_DEVICE_DISABLED)
>>>>> +		goto update_timer;
>>>>> +
>>>>>  	if (atomic_read(&in_suspend))
>>>>>  		return;
>>>>>  
>>>>> @@ -477,6 +501,8 @@ void thermal_zone_device_update(struct
>>>>> thermal_zone_device *tz,
>>>>>  
>>>>>  	for (count = 0; count < tz->trips; count++)
>>>>>  		handle_thermal_trip(tz, count);
>>>>> +
>>>>> +update_timer:
>>>>>  	monitor_thermal_zone(tz);
>>>>>  }
>>>>>  EXPORT_SYMBOL_GPL(thermal_zone_device_update);
>>>>> @@ -1275,6 +1301,17 @@ thermal_zone_device_register(const char
>>>>> *type, int trips, int mask,
>>>>>  	INIT_DELAYED_WORK(&tz->poll_queue,
>>>>> thermal_zone_device_check);
>>>>>  
>>>>>  	thermal_zone_device_reset(tz);
>>>>> +
>>>>> +	/*
>>>>> +	 * As .get_mode() is not guaranteed to work at this
>>>>> moment, to register
>>>>> +	 * a disabled thermal zone, tzp->disalbed must be set
>>>>> explicitly.
>>>> disalbed -> disabled
>>>>
>>>>>
>>>>>
>>>>> +	 */
>>>>> +	if (tz->tzp) {
>>>>> +		tz->mode = tzp->disabled == true ?
>>>>> THERMAL_DEVICE_DISABLED :
>>>>> +						   THERMAL_DEV
>>>>> ICE_
>>>>> ENABLED;
>>>> Your patch only fixes thermal_zone_device_register(), what about
>>>> fixing [devm_]thermal_zone_of_sensor_register() which is actually
>>>> used by DT thermal drivers?
>>> that's next step.
>>> This is a draft patch to address my proposal for the issues.
>>> And the basic idea is to separate thermal_zone_device registration
>>> and
>>> thermal_zone_device_update() so that drivers can invoke
>>> thermal_zone_device_update() (via thermal_zone_set_mode()) only
>>> after
>>> it's ready.
>>>
>>> And the difference between your proposal and this one is that, I'd
>>> like
>>> to see if we can solve all the problems by implementing the "mode"
>>> control in one place, aka, thermal core, instead of of_thermal and
>>> any 
>>> platform thermal drivers.
>> Could please explain more what do you mean by that?
>>
>> In my patches there is just one place for controlling 'mode'
>> which is thermal_zone_set_mode() introduced in patch #1:
>>
>> int thermal_zone_set_mode(struct thermal_zone_device *tz,
>>                          enum thermal_device_mode mode)
>> {
>>        return tz->ops->set_mode(tz, mode);
>> }
>>
>> and it can be easily later evolved to be more complex one
>> (i.e. one in your patch).
>>
>> Moreover patch #4 moves 'mode' control out from of-thermal.c
>> (present in vanilla kernel) to respective DT drivers.
>>
>> Also my patchset has no functionality changes for platform
>> drivers.. 
>>>
>>>>
>>>>
>>>>>
>>>>>
>>>>> +	} else
>>>>> +		tz->mode = THERMAL_DEVICE_ENABLED;
>>>>> +
>>>>>  	/* Update the new thermal zone and mark it as already
>>>>> updated. */
>>>>>  	if (atomic_cmpxchg(&tz->need_update, 1, 0))
>>>>>  		thermal_zone_device_update(tz,
>>>>> THERMAL_EVENT_UNSPECIFIED);
>>>>> diff --git a/drivers/thermal/thermal_sysfs.c
>>>>> b/drivers/thermal/thermal_sysfs.c
>>>>> index 2241cea..8d144d4 100644
>>>>> --- a/drivers/thermal/thermal_sysfs.c
>>>>> +++ b/drivers/thermal/thermal_sysfs.c
>>>>> @@ -49,18 +49,9 @@ static ssize_t
>>>>>  mode_show(struct device *dev, struct device_attribute *attr,
>>>>> char
>>>>> *buf)
>>>>>  {
>>>>>  	struct thermal_zone_device *tz = to_thermal_zone(dev);
>>>>> -	enum thermal_device_mode mode;
>>>>> -	int result;
>>>>> -
>>>>> -	if (!tz->ops->get_mode)
>>>>> -		return -EPERM;
>>>>> -
>>>>> -	result = tz->ops->get_mode(tz, &mode);
>>>>> -	if (result)
>>>>> -		return result;
>>>>>  
>>>>> -	return sprintf(buf, "%s\n", mode ==
>>>>> THERMAL_DEVICE_ENABLED
>>>>> ? "enabled"
>>>>> -		       : "disabled");
>>>>> +	return sprintf(buf, "%s\n", tz->mode ==
>>>>> THERMAL_DEVICE_ENABLED ?
>>>>> +			"enabled" : "disabled");
>>>>>  }
>>>>>  
>>>>>  static ssize_t
>>>>> @@ -68,20 +59,16 @@ mode_store(struct device *dev, struct
>>>>> device_attribute *attr,
>>>>>  	   const char *buf, size_t count)
>>>>>  {
>>>>>  	struct thermal_zone_device *tz = to_thermal_zone(dev);
>>>>> -	int result;
>>>>> -
>>>>> -	if (!tz->ops->set_mode)
>>>>> -		return -EPERM;
>>>>> +	enum thermal_device_mode mode;
>>>>>  
>>>>>  	if (!strncmp(buf, "enabled", sizeof("enabled") - 1))
>>>>> -		result = tz->ops->set_mode(tz,
>>>>> THERMAL_DEVICE_ENABLED);
>>>>> +		mode = THERMAL_DEVICE_ENABLED;
>>>>>  	else if (!strncmp(buf, "disabled", sizeof("disabled")
>>>>> -
>>>>> 1))
>>>>> -		result = tz->ops->set_mode(tz,
>>>>> THERMAL_DEVICE_DISABLED);
>>>>> +		mode = THERMAL_DEVICE_DISABLED;
>>>>>  	else
>>>>> -		result = -EINVAL;
>>>>> +		return -EINVAL;
>>>>>  
>>>>> -	if (result)
>>>>> -		return result;
>>>>> +	thermal_zone_set_mode(tz, mode);
>>>> Why is thermal_zone_set_mode() return value ignored?
>>>>
>>>>>
>>>>>
>>>>>  	return count;
>>>>>  }
>>>>> diff --git a/include/linux/thermal.h b/include/linux/thermal.h
>>>>> index 5f4705f..67e4b5b 100644
>>>>> --- a/include/linux/thermal.h
>>>>> +++ b/include/linux/thermal.h
>>>>> @@ -197,6 +197,7 @@ struct thermal_zone_device {
>>>>>  	struct thermal_attr *trip_type_attrs;
>>>>>  	struct thermal_attr *trip_hyst_attrs;
>>>>>  	void *devdata;
>>>>> +	enum thermal_device_mode mode;
>>>>>  	int trips;
>>>>>  	unsigned long trips_disabled;	/* bitmap for
>>>>> disabled trips */
>>>>>  	int passive_delay;
>>>>> @@ -287,6 +288,9 @@ struct thermal_zone_params {
>>>>>  	 */
>>>>>  	bool no_hwmon;
>>>>>  
>>>>> +	/* a boolean to indicate if the thermal zone is
>>>>> disabled
>>>>> or not */
>>>>> +	bool disabled;
>>>>> +
>>>>>  	int num_tbps;	/* Number of tbp entries */
>>>>>  	struct thermal_bind_params *tbp;
>>>>>  
>>>>> @@ -452,6 +456,8 @@ struct thermal_zone_device
>>>>> *thermal_zone_get_zone_by_name(const char *name);
>>>>>  int thermal_zone_get_temp(struct thermal_zone_device *tz, int
>>>>> *temp);
>>>>>  int thermal_zone_get_slope(struct thermal_zone_device *tz);
>>>>>  int thermal_zone_get_offset(struct thermal_zone_device *tz);
>>>>> +int thermal_zone_set_mode(struct thermal_zone_device *tz,
>>>>> +			enum thermal_device_mode mode);
>>>>>  
>>>>>  int get_tz_trend(struct thermal_zone_device *, int);
>>>>>  struct thermal_instance *get_thermal_instance(struct
>>>>> thermal_zone_device *,
>>>>> @@ -518,6 +524,9 @@ static inline int thermal_zone_get_slope(
>>>>>  static inline int thermal_zone_get_offset(
>>>>>  		struct thermal_zone_device *tz)
>>>>>  { return -ENODEV; }
>>>>> +static inline int thermal_zone_set_mode(struct
>>>>> thermal_zone_device
>>>>> *tz,
>>>>> +		enum thermal_device_mode mode);
>>>>> +{ return -ENODEV; }
>>>>>  static inline int get_tz_trend(struct thermal_zone_device *tz,
>>>>> int
>>>>> trip)
>>>>>  { return -ENODEV; }
>>>>>  static inline struct thermal_instance *
diff mbox series

Patch

diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
index 98f02b0..65a984a 100644
--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c
@@ -305,7 +305,9 @@  static void monitor_thermal_zone(struct thermal_zone_device *tz)
 {
 	mutex_lock(&tz->lock);
 
-	if (tz->passive)
+	if (tz->mode == THERMAL_DEVICE_DISABLED)
+		thermal_zone_device_set_polling(tz, 0);
+	else if (tz->passive)
 		thermal_zone_device_set_polling(tz, tz->passive_delay);
 	else if (tz->polling_delay)
 		thermal_zone_device_set_polling(tz, tz->polling_delay);
@@ -458,11 +460,33 @@  static void thermal_zone_device_reset(struct thermal_zone_device *tz)
 		pos->initialized = false;
 }
 
+int thermal_zone_set_mode(struct thermal_zone_device *tz,
+				 enum thermal_device_mode mode)
+{
+	int result;
+
+	if (tz->mode == mode)
+		return 0;
+
+	if (tz->ops->set_mode) {
+		result = tz->ops->set_mode(tz, mode);
+		if (result)
+			return result;
+	}
+
+	thermal_zone_device_update(tz, THERMAL_EVENT_UNSPECIFIED);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(thermal_zone_set_mode);
+
 void thermal_zone_device_update(struct thermal_zone_device *tz,
 				enum thermal_notify_event event)
 {
 	int count;
 
+	if (tz->mode == THERMAL_DEVICE_DISABLED)
+		goto update_timer;
+
 	if (atomic_read(&in_suspend))
 		return;
 
@@ -477,6 +501,8 @@  void thermal_zone_device_update(struct thermal_zone_device *tz,
 
 	for (count = 0; count < tz->trips; count++)
 		handle_thermal_trip(tz, count);
+
+update_timer:
 	monitor_thermal_zone(tz);
 }
 EXPORT_SYMBOL_GPL(thermal_zone_device_update);
@@ -1275,6 +1301,17 @@  thermal_zone_device_register(const char *type, int trips, int mask,
 	INIT_DELAYED_WORK(&tz->poll_queue, thermal_zone_device_check);
 
 	thermal_zone_device_reset(tz);
+
+	/*
+	 * As .get_mode() is not guaranteed to work at this moment, to register
+	 * a disabled thermal zone, tzp->disalbed must be set explicitly.
+	 */
+	if (tz->tzp) {
+		tz->mode = tzp->disabled == true ? THERMAL_DEVICE_DISABLED :
+						   THERMAL_DEVICE_ENABLED;
+	} else
+		tz->mode = THERMAL_DEVICE_ENABLED;
+
 	/* Update the new thermal zone and mark it as already updated. */
 	if (atomic_cmpxchg(&tz->need_update, 1, 0))
 		thermal_zone_device_update(tz, THERMAL_EVENT_UNSPECIFIED);
diff --git a/drivers/thermal/thermal_sysfs.c b/drivers/thermal/thermal_sysfs.c
index 2241cea..8d144d4 100644
--- a/drivers/thermal/thermal_sysfs.c
+++ b/drivers/thermal/thermal_sysfs.c
@@ -49,18 +49,9 @@  static ssize_t
 mode_show(struct device *dev, struct device_attribute *attr, char *buf)
 {
 	struct thermal_zone_device *tz = to_thermal_zone(dev);
-	enum thermal_device_mode mode;
-	int result;
-
-	if (!tz->ops->get_mode)
-		return -EPERM;
-
-	result = tz->ops->get_mode(tz, &mode);
-	if (result)
-		return result;
 
-	return sprintf(buf, "%s\n", mode == THERMAL_DEVICE_ENABLED ? "enabled"
-		       : "disabled");
+	return sprintf(buf, "%s\n", tz->mode == THERMAL_DEVICE_ENABLED ?
+			"enabled" : "disabled");
 }
 
 static ssize_t
@@ -68,20 +59,16 @@  mode_store(struct device *dev, struct device_attribute *attr,
 	   const char *buf, size_t count)
 {
 	struct thermal_zone_device *tz = to_thermal_zone(dev);
-	int result;
-
-	if (!tz->ops->set_mode)
-		return -EPERM;
+	enum thermal_device_mode mode;
 
 	if (!strncmp(buf, "enabled", sizeof("enabled") - 1))
-		result = tz->ops->set_mode(tz, THERMAL_DEVICE_ENABLED);
+		mode = THERMAL_DEVICE_ENABLED;
 	else if (!strncmp(buf, "disabled", sizeof("disabled") - 1))
-		result = tz->ops->set_mode(tz, THERMAL_DEVICE_DISABLED);
+		mode = THERMAL_DEVICE_DISABLED;
 	else
-		result = -EINVAL;
+		return -EINVAL;
 
-	if (result)
-		return result;
+	thermal_zone_set_mode(tz, mode);
 
 	return count;
 }
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index 5f4705f..67e4b5b 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -197,6 +197,7 @@  struct thermal_zone_device {
 	struct thermal_attr *trip_type_attrs;
 	struct thermal_attr *trip_hyst_attrs;
 	void *devdata;
+	enum thermal_device_mode mode;
 	int trips;
 	unsigned long trips_disabled;	/* bitmap for disabled trips */
 	int passive_delay;
@@ -287,6 +288,9 @@  struct thermal_zone_params {
 	 */
 	bool no_hwmon;
 
+	/* a boolean to indicate if the thermal zone is disabled or not */
+	bool disabled;
+
 	int num_tbps;	/* Number of tbp entries */
 	struct thermal_bind_params *tbp;
 
@@ -452,6 +456,8 @@  struct thermal_zone_device *thermal_zone_get_zone_by_name(const char *name);
 int thermal_zone_get_temp(struct thermal_zone_device *tz, int *temp);
 int thermal_zone_get_slope(struct thermal_zone_device *tz);
 int thermal_zone_get_offset(struct thermal_zone_device *tz);
+int thermal_zone_set_mode(struct thermal_zone_device *tz,
+			enum thermal_device_mode mode);
 
 int get_tz_trend(struct thermal_zone_device *, int);
 struct thermal_instance *get_thermal_instance(struct thermal_zone_device *,
@@ -518,6 +524,9 @@  static inline int thermal_zone_get_slope(
 static inline int thermal_zone_get_offset(
 		struct thermal_zone_device *tz)
 { return -ENODEV; }
+static inline int thermal_zone_set_mode(struct thermal_zone_device *tz,
+		enum thermal_device_mode mode);
+{ return -ENODEV; }
 static inline int get_tz_trend(struct thermal_zone_device *tz, int trip)
 { return -ENODEV; }
 static inline struct thermal_instance *