diff mbox series

[v2] arm64: dts: rockchip: enable built-in thermal monitoring on rk3588

Message ID 20240109192608.5981-1-alchark@gmail.com (mailing list archive)
State New, archived
Headers show
Series [v2] arm64: dts: rockchip: enable built-in thermal monitoring on rk3588 | expand

Commit Message

Alexey Charkov Jan. 9, 2024, 7:19 p.m. UTC
Include thermal zones information in device tree for rk3588 variants
and enable the built-in thermal sensing ADC on RADXA Rock 5B

Signed-off-by: Alexey Charkov <alchark@gmail.com>
---
Changes in v2:
 - Dropped redundant comments
 - Included all CPU cores in cooling maps
 - Split cooling maps into more granular ones utilizing TSADC
   channels 1-3 which measure temperature by separate CPU clusters
   instead of channel 0 which measures the center of the SoC die
---
 .../boot/dts/rockchip/rk3588-rock-5b.dts      |   4 +
 arch/arm64/boot/dts/rockchip/rk3588s.dtsi     | 151 ++++++++++++++++++
 2 files changed, 155 insertions(+)

Comments

Dragan Simic Jan. 18, 2024, 7:20 p.m. UTC | #1
On 2024-01-09 20:19, Alexey Charkov wrote:
> Include thermal zones information in device tree for rk3588 variants
> and enable the built-in thermal sensing ADC on RADXA Rock 5B
> 
> Signed-off-by: Alexey Charkov <alchark@gmail.com>
> ---
> Changes in v2:
>  - Dropped redundant comments
>  - Included all CPU cores in cooling maps
>  - Split cooling maps into more granular ones utilizing TSADC
>    channels 1-3 which measure temperature by separate CPU clusters
>    instead of channel 0 which measures the center of the SoC die
> ---
>  .../boot/dts/rockchip/rk3588-rock-5b.dts      |   4 +
>  arch/arm64/boot/dts/rockchip/rk3588s.dtsi     | 151 ++++++++++++++++++
>  2 files changed, 155 insertions(+)
> 
> diff --git a/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts
> b/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts
> index a5a104131403..f9d540000de3 100644
> --- a/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts
> +++ b/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts
> @@ -772,3 +772,7 @@ &usb_host1_ehci {
>  &usb_host1_ohci {
>  	status = "okay";
>  };
> +
> +&tsadc {
> +	status = "okay";
> +};

I keep forgetting to note that enabling it for the Rock 5B should
be performed in a separate patch.

> diff --git a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
> b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
> index 8aa0499f9b03..8d54998d0ecc 100644
> --- a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
> +++ b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
> @@ -10,6 +10,7 @@
>  #include <dt-bindings/reset/rockchip,rk3588-cru.h>
>  #include <dt-bindings/phy/phy.h>
>  #include <dt-bindings/ata/ahci.h>
> +#include <dt-bindings/thermal/thermal.h>
> 
>  / {
>  	compatible = "rockchip,rk3588";
> @@ -2112,6 +2113,156 @@ tsadc: tsadc@fec00000 {
>  		status = "disabled";
>  	};
> 
> +	thermal_zones: thermal-zones {
> +		/* sensor near the center of the whole chip */
> +		soc_thermal: soc-thermal {
> +			polling-delay-passive = <20>;
> +			polling-delay = <1000>;
> +			sustainable-power = <2100>;
> +			thermal-sensors = <&tsadc 0>;
> +
> +			trips {
> +				soc_crit: soc-crit {
> +					temperature = <115000>;
> +					hysteresis = <2000>;
> +					type = "critical";
> +				};
> +			};
> +		};

As already noted in my previous response, perhaps it whould be
better to name it package_thermal instead.  That way, it should
be more self descriptive.

> +		/* sensor between A76 cores 0 and 1 */
> +		bigcore0_thermal: bigcore0-thermal {
> +			polling-delay-passive = <20>;
> +			polling-delay = <1000>;
> +			thermal-sensors = <&tsadc 1>;
> +
> +			trips {
> +				bigcore0_alert: bigcore0-alert {
> +					temperature = <85000>;
> +					hysteresis = <2000>;
> +					type = "passive";
> +				};
> +				bigcore0_crit: bigcore0-crit {
> +					temperature = <115000>;
> +					hysteresis = <2000>;
> +					type = "critical";
> +				};
> +			};

As already noted in my previous message, perhaps another trip,
of the "hot" type, should be added here.

> +			cooling-maps {
> +				map0 {
> +					trip = <&bigcore0_alert>;
> +					cooling-device =
> +						<&cpu_b0 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
> +						<&cpu_b1 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
> +					contribution = <1024>;
> +				};
> +			};
> +		};
> +
> +		/* sensor between A76 cores 2 and 3 */
> +		bigcore2_thermal: bigcore2-thermal {
> +			polling-delay-passive = <20>;
> +			polling-delay = <1000>;
> +			thermal-sensors = <&tsadc 2>;
> +
> +			trips {
> +				bigcore2_alert: bigcore2-alert {
> +					temperature = <85000>;
> +					hysteresis = <2000>;
> +					type = "passive";
> +				};
> +				bigcore2_crit: bigcore2-crit {
> +					temperature = <115000>;
> +					hysteresis = <2000>;
> +					type = "critical";
> +				};
> +			};

The same suggestion about one more "hot" trip applies here as well.

> +			cooling-maps {
> +				map1 {
> +					trip = <&bigcore2_alert>;
> +					cooling-device =
> +						<&cpu_b2 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
> +						<&cpu_b3 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
> +					contribution = <1024>;
> +				};
> +			};
> +		};
> +
> +		/* sensor between the four A55 cores */
> +		little_core_thermal: littlecore-thermal {
> +			polling-delay-passive = <20>;
> +			polling-delay = <1000>;
> +			thermal-sensors = <&tsadc 3>;
> +
> +			trips {
> +				littlecore_alert: littlecore-alert {
> +					temperature = <85000>;
> +					hysteresis = <2000>;
> +					type = "passive";
> +				};
> +				littlecore_crit: littlecore-crit {
> +					temperature = <115000>;
> +					hysteresis = <2000>;
> +					type = "critical";
> +				};
> +			};

The same suggestion about one more "hot" trip applies here as well.

> +			cooling-maps {
> +				map2 {
> +					trip = <&littlecore_alert>;
> +					cooling-device =
> +						<&cpu_l0 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
> +						<&cpu_l1 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
> +						<&cpu_l2 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
> +						<&cpu_l3 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
> +					contribution = <1024>;
> +				};
> +			};
> +		};
> +
> +		/* sensor near the PD_CENTER power domain */
> +		center_thermal: center-thermal {
> +			polling-delay-passive = <20>;
> +			polling-delay = <1000>;
> +			thermal-sensors = <&tsadc 4>;
> +
> +			trips {
> +				center_crit: center-crit {
> +					temperature = <115000>;
> +					hysteresis = <2000>;
> +					type = "critical";
> +				};
> +			};
> +		};
> +
> +		gpu_thermal: gpu-thermal {
> +			polling-delay-passive = <20>;
> +			polling-delay = <1000>;
> +			thermal-sensors = <&tsadc 5>;
> +
> +			trips {
> +				gpu_crit: gpu-crit {
> +					temperature = <115000>;
> +					hysteresis = <2000>;
> +					type = "critical";
> +				};
> +			};
> +		};
> +
> +		npu_thermal: npu-thermal {
> +			polling-delay-passive = <20>;
> +			polling-delay = <1000>;
> +			thermal-sensors = <&tsadc 6>;
> +
> +			trips {
> +				npu_crit: npu-crit {
> +					temperature = <115000>;
> +					hysteresis = <2000>;
> +					type = "critical";
> +				};
> +			};
> +		};
> +	};
> +
>  	saradc: adc@fec10000 {
>  		compatible = "rockchip,rk3588-saradc";
>  		reg = <0x0 0xfec10000 0x0 0x10000>;
Heiko Stuebner Jan. 19, 2024, 1:15 p.m. UTC | #2
Am Dienstag, 9. Januar 2024, 20:19:47 CET schrieb Alexey Charkov:
> Include thermal zones information in device tree for rk3588 variants
> and enable the built-in thermal sensing ADC on RADXA Rock 5B
> 
> Signed-off-by: Alexey Charkov <alchark@gmail.com>
> ---
> Changes in v2:
>  - Dropped redundant comments
>  - Included all CPU cores in cooling maps
>  - Split cooling maps into more granular ones utilizing TSADC
>    channels 1-3 which measure temperature by separate CPU clusters
>    instead of channel 0 which measures the center of the SoC die

all of what Dragan wrote and additionally,
please don't post v2 patches as reply to earlier versions.
It confuses tooling like "b4" when trying to retrieve patches.


Thanks
Heiko

> ---
>  .../boot/dts/rockchip/rk3588-rock-5b.dts      |   4 +
>  arch/arm64/boot/dts/rockchip/rk3588s.dtsi     | 151 ++++++++++++++++++
>  2 files changed, 155 insertions(+)
> 
> diff --git a/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts b/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts
> index a5a104131403..f9d540000de3 100644
> --- a/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts
> +++ b/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts
> @@ -772,3 +772,7 @@ &usb_host1_ehci {
>  &usb_host1_ohci {
>  	status = "okay";
>  };
> +
> +&tsadc {
> +	status = "okay";
> +};
> diff --git a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
> index 8aa0499f9b03..8d54998d0ecc 100644
> --- a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
> +++ b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
> @@ -10,6 +10,7 @@
>  #include <dt-bindings/reset/rockchip,rk3588-cru.h>
>  #include <dt-bindings/phy/phy.h>
>  #include <dt-bindings/ata/ahci.h>
> +#include <dt-bindings/thermal/thermal.h>
>  
>  / {
>  	compatible = "rockchip,rk3588";
> @@ -2112,6 +2113,156 @@ tsadc: tsadc@fec00000 {
>  		status = "disabled";
>  	};
>  
> +	thermal_zones: thermal-zones {
> +		/* sensor near the center of the whole chip */
> +		soc_thermal: soc-thermal {
> +			polling-delay-passive = <20>;
> +			polling-delay = <1000>;
> +			sustainable-power = <2100>;
> +			thermal-sensors = <&tsadc 0>;
> +
> +			trips {
> +				soc_crit: soc-crit {
> +					temperature = <115000>;
> +					hysteresis = <2000>;
> +					type = "critical";
> +				};
> +			};
> +		};
> +
> +		/* sensor between A76 cores 0 and 1 */
> +		bigcore0_thermal: bigcore0-thermal {
> +			polling-delay-passive = <20>;
> +			polling-delay = <1000>;
> +			thermal-sensors = <&tsadc 1>;
> +
> +			trips {
> +				bigcore0_alert: bigcore0-alert {
> +					temperature = <85000>;
> +					hysteresis = <2000>;
> +					type = "passive";
> +				};
> +				bigcore0_crit: bigcore0-crit {
> +					temperature = <115000>;
> +					hysteresis = <2000>;
> +					type = "critical";
> +				};
> +			};
> +			cooling-maps {
> +				map0 {
> +					trip = <&bigcore0_alert>;
> +					cooling-device =
> +						<&cpu_b0 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
> +						<&cpu_b1 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
> +					contribution = <1024>;
> +				};
> +			};
> +		};
> +
> +		/* sensor between A76 cores 2 and 3 */
> +		bigcore2_thermal: bigcore2-thermal {
> +			polling-delay-passive = <20>;
> +			polling-delay = <1000>;
> +			thermal-sensors = <&tsadc 2>;
> +
> +			trips {
> +				bigcore2_alert: bigcore2-alert {
> +					temperature = <85000>;
> +					hysteresis = <2000>;
> +					type = "passive";
> +				};
> +				bigcore2_crit: bigcore2-crit {
> +					temperature = <115000>;
> +					hysteresis = <2000>;
> +					type = "critical";
> +				};
> +			};
> +			cooling-maps {
> +				map1 {
> +					trip = <&bigcore2_alert>;
> +					cooling-device =
> +						<&cpu_b2 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
> +						<&cpu_b3 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
> +					contribution = <1024>;
> +				};
> +			};
> +		};
> +
> +		/* sensor between the four A55 cores */
> +		little_core_thermal: littlecore-thermal {
> +			polling-delay-passive = <20>;
> +			polling-delay = <1000>;
> +			thermal-sensors = <&tsadc 3>;
> +
> +			trips {
> +				littlecore_alert: littlecore-alert {
> +					temperature = <85000>;
> +					hysteresis = <2000>;
> +					type = "passive";
> +				};
> +				littlecore_crit: littlecore-crit {
> +					temperature = <115000>;
> +					hysteresis = <2000>;
> +					type = "critical";
> +				};
> +			};
> +			cooling-maps {
> +				map2 {
> +					trip = <&littlecore_alert>;
> +					cooling-device =
> +						<&cpu_l0 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
> +						<&cpu_l1 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
> +						<&cpu_l2 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
> +						<&cpu_l3 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
> +					contribution = <1024>;
> +				};
> +			};
> +		};
> +
> +		/* sensor near the PD_CENTER power domain */
> +		center_thermal: center-thermal {
> +			polling-delay-passive = <20>;
> +			polling-delay = <1000>;
> +			thermal-sensors = <&tsadc 4>;
> +
> +			trips {
> +				center_crit: center-crit {
> +					temperature = <115000>;
> +					hysteresis = <2000>;
> +					type = "critical";
> +				};
> +			};
> +		};
> +
> +		gpu_thermal: gpu-thermal {
> +			polling-delay-passive = <20>;
> +			polling-delay = <1000>;
> +			thermal-sensors = <&tsadc 5>;
> +
> +			trips {
> +				gpu_crit: gpu-crit {
> +					temperature = <115000>;
> +					hysteresis = <2000>;
> +					type = "critical";
> +				};
> +			};
> +		};
> +
> +		npu_thermal: npu-thermal {
> +			polling-delay-passive = <20>;
> +			polling-delay = <1000>;
> +			thermal-sensors = <&tsadc 6>;
> +
> +			trips {
> +				npu_crit: npu-crit {
> +					temperature = <115000>;
> +					hysteresis = <2000>;
> +					type = "critical";
> +				};
> +			};
> +		};
> +	};
> +
>  	saradc: adc@fec10000 {
>  		compatible = "rockchip,rk3588-saradc";
>  		reg = <0x0 0xfec10000 0x0 0x10000>;
>
Daniel Lezcano Jan. 19, 2024, 4:21 p.m. UTC | #3
On 09/01/2024 20:19, Alexey Charkov wrote:
> Include thermal zones information in device tree for rk3588 variants
> and enable the built-in thermal sensing ADC on RADXA Rock 5B
> 
> Signed-off-by: Alexey Charkov <alchark@gmail.com>
> ---
> Changes in v2:
>   - Dropped redundant comments
>   - Included all CPU cores in cooling maps
>   - Split cooling maps into more granular ones utilizing TSADC
>     channels 1-3 which measure temperature by separate CPU clusters
>     instead of channel 0 which measures the center of the SoC die
> ---
>   .../boot/dts/rockchip/rk3588-rock-5b.dts      |   4 +
>   arch/arm64/boot/dts/rockchip/rk3588s.dtsi     | 151 ++++++++++++++++++
>   2 files changed, 155 insertions(+)
> 
> diff --git a/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts b/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts
> index a5a104131403..f9d540000de3 100644
> --- a/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts
> +++ b/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts
> @@ -772,3 +772,7 @@ &usb_host1_ehci {
>   &usb_host1_ohci {
>   	status = "okay";
>   };
> +
> +&tsadc {
> +	status = "okay";
> +};
> diff --git a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
> index 8aa0499f9b03..8d54998d0ecc 100644
> --- a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
> +++ b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
> @@ -10,6 +10,7 @@
>   #include <dt-bindings/reset/rockchip,rk3588-cru.h>
>   #include <dt-bindings/phy/phy.h>
>   #include <dt-bindings/ata/ahci.h>
> +#include <dt-bindings/thermal/thermal.h>
>   
>   / {
>   	compatible = "rockchip,rk3588";
> @@ -2112,6 +2113,156 @@ tsadc: tsadc@fec00000 {
>   		status = "disabled";
>   	};
>   
> +	thermal_zones: thermal-zones {
> +		/* sensor near the center of the whole chip */
> +		soc_thermal: soc-thermal {
> +			polling-delay-passive = <20>;

There is no mitigation set for this thermal zone. It is pointless to 
specify a passive polling.

> +			polling-delay = <1000>;

The driver is interrupt driven. No need to poll.

> +			sustainable-power = <2100>;

There is no mitigation with this thermal zone. Specifying a sustainable 
power does not make sense.

> +			thermal-sensors = <&tsadc 0>;
> +
> +			trips {
> +				soc_crit: soc-crit {
> +					temperature = <115000>;
> +					hysteresis = <2000>;

This trip point leads to a system shutdown / reboot. It is not necessary 
to specify a hysteresis.

> +					type = "critical";
> +				};
> +			};
> +		};
> +
> +		/* sensor between A76 cores 0 and 1 */
> +		bigcore0_thermal: bigcore0-thermal {
> +			polling-delay-passive = <20>;
> +			polling-delay = <1000>;

The driver is interrupt driven. No need to poll.

> +			thermal-sensors = <&tsadc 1>;
> +
> +			trips {
> +				bigcore0_alert: bigcore0-alert {
> +					temperature = <85000>;
> +					hysteresis = <2000>;
> +					type = "passive";
> +				};
> +				bigcore0_crit: bigcore0-crit {
> +					temperature = <115000>;
> +					hysteresis = <2000>;
> +					type = "critical";
> +				};
> +			};
> +			cooling-maps {
> +				map0 {
> +					trip = <&bigcore0_alert>;
> +					cooling-device =
> +						<&cpu_b0 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
> +						<&cpu_b1 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
> +					contribution = <1024>;

If you specify the contribution, that means it is expected to use the 
IPA governor. However, this one needs an extra trip point before 'alert' 
to begin collecting temperatures in order to initialize the PID loop of 
the IPA.

> +				};
> +			};
> +		};
> +
> +		/* sensor between A76 cores 2 and 3 */
> +		bigcore2_thermal: bigcore2-thermal {
> +			polling-delay-passive = <20>;
> +			polling-delay = <1000>;

The driver is interrupt driven. No need to poll.

> +			thermal-sensors = <&tsadc 2>;
> +
> +			trips {
> +				bigcore2_alert: bigcore2-alert {
> +					temperature = <85000>;
> +					hysteresis = <2000>;
> +					type = "passive";
> +				};
> +				bigcore2_crit: bigcore2-crit {
> +					temperature = <115000>;
> +					hysteresis = <2000>;
> +					type = "critical";
> +				};
> +			};
> +			cooling-maps {
> +				map1 {
> +					trip = <&bigcore2_alert>;
> +					cooling-device =
> +						<&cpu_b2 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
> +						<&cpu_b3 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
> +					contribution = <1024>;
> +				};
> +			};
> +		};
> +
> +		/* sensor between the four A55 cores */
> +		little_core_thermal: littlecore-thermal {
> +			polling-delay-passive = <20>;
> +			polling-delay = <1000>;

The driver is interrupt driven. No need to poll.

> +			thermal-sensors = <&tsadc 3>;
> +
> +			trips {
> +				littlecore_alert: littlecore-alert {
> +					temperature = <85000>;
> +					hysteresis = <2000>;
> +					type = "passive";
> +				};
> +				littlecore_crit: littlecore-crit {
> +					temperature = <115000>;
> +					hysteresis = <2000>;
> +					type = "critical";
> +				};
> +			};
> +			cooling-maps {
> +				map2 {
> +					trip = <&littlecore_alert>;
> +					cooling-device =
> +						<&cpu_l0 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
> +						<&cpu_l1 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
> +						<&cpu_l2 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
> +						<&cpu_l3 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
> +					contribution = <1024>;
> +				};
> +			};
> +		};
> +
> +		/* sensor near the PD_CENTER power domain */
> +		center_thermal: center-thermal {
> +			polling-delay-passive = <20>;
> +			polling-delay = <1000>;

Same comment as above for "soc-thermal"

> +			thermal-sensors = <&tsadc 4>;
> +
> +			trips {
> +				center_crit: center-crit {
> +					temperature = <115000>;
> +					hysteresis = <2000>;
> +					type = "critical";
> +				};
> +			};
> +		};
> +
> +		gpu_thermal: gpu-thermal {
> +			polling-delay-passive = <20>;
> +			polling-delay = <1000>;

Same comment as above for "soc-thermal"



> +			thermal-sensors = <&tsadc 5>;
> +
> +			trips {
> +				gpu_crit: gpu-crit {
> +					temperature = <115000>;
> +					hysteresis = <2000>;
> +					type = "critical";
> +				};
> +			};
> +		};
> +
> +		npu_thermal: npu-thermal {
> +			polling-delay-passive = <20>;
> +			polling-delay = <1000>;

Same comment as above for "soc-thermal"


> +			thermal-sensors = <&tsadc 6>;
> +
> +			trips {
> +				npu_crit: npu-crit {
> +					temperature = <115000>;
> +					hysteresis = <2000>;
> +					type = "critical";
> +				};
> +			};
> +		};
> +	};
> +
>   	saradc: adc@fec10000 {
>   		compatible = "rockchip,rk3588-saradc";
>   		reg = <0x0 0xfec10000 0x0 0x10000>;
Alexey Charkov Jan. 21, 2024, 7:57 p.m. UTC | #4
On Fri, Jan 19, 2024 at 8:21 PM Daniel Lezcano
<daniel.lezcano@linaro.org> wrote:
Hello Daniel,

Thanks a lot for your review and comments! Please see some reflections below.

> On 09/01/2024 20:19, Alexey Charkov wrote:
> > Include thermal zones information in device tree for rk3588 variants
> > and enable the built-in thermal sensing ADC on RADXA Rock 5B
> >
> > Signed-off-by: Alexey Charkov <alchark@gmail.com>
> > ---
> > Changes in v2:
> >   - Dropped redundant comments
> >   - Included all CPU cores in cooling maps
> >   - Split cooling maps into more granular ones utilizing TSADC
> >     channels 1-3 which measure temperature by separate CPU clusters
> >     instead of channel 0 which measures the center of the SoC die
> > ---
> >   .../boot/dts/rockchip/rk3588-rock-5b.dts      |   4 +
> >   arch/arm64/boot/dts/rockchip/rk3588s.dtsi     | 151 ++++++++++++++++++
> >   2 files changed, 155 insertions(+)
> >
> > diff --git a/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts b/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts
> > index a5a104131403..f9d540000de3 100644
> > --- a/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts
> > +++ b/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts
> > @@ -772,3 +772,7 @@ &usb_host1_ehci {
> >   &usb_host1_ohci {
> >       status = "okay";
> >   };
> > +
> > +&tsadc {
> > +     status = "okay";
> > +};
> > diff --git a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
> > index 8aa0499f9b03..8d54998d0ecc 100644
> > --- a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
> > +++ b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
> > @@ -10,6 +10,7 @@
> >   #include <dt-bindings/reset/rockchip,rk3588-cru.h>
> >   #include <dt-bindings/phy/phy.h>
> >   #include <dt-bindings/ata/ahci.h>
> > +#include <dt-bindings/thermal/thermal.h>
> >
> >   / {
> >       compatible = "rockchip,rk3588";
> > @@ -2112,6 +2113,156 @@ tsadc: tsadc@fec00000 {
> >               status = "disabled";
> >       };
> >
> > +     thermal_zones: thermal-zones {
> > +             /* sensor near the center of the whole chip */
> > +             soc_thermal: soc-thermal {
> > +                     polling-delay-passive = <20>;
>
> There is no mitigation set for this thermal zone. It is pointless to
> specify a passive polling.

Indeed, it makes sense to me. There seems to be a catch though in that
the driver calls the generic thermal_of_zone_register during the
initial probe, which expects both of those polling delays to be
present in the device tree, otherwise it simply refuses to add the
respective thermal zone, see drivers/thermal/thermal_of.c:502

> > +                     polling-delay = <1000>;
>
> The driver is interrupt driven. No need to poll.

Same here as above

> > +                     sustainable-power = <2100>;
>
> There is no mitigation with this thermal zone. Specifying a sustainable
> power does not make sense.

Thanks, will drop this in v3!

> > +                     thermal-sensors = <&tsadc 0>;
> > +
> > +                     trips {
> > +                             soc_crit: soc-crit {
> > +                                     temperature = <115000>;
> > +                                     hysteresis = <2000>;
>
> This trip point leads to a system shutdown / reboot. It is not necessary
> to specify a hysteresis.

Similar to the above, the generic thermal_of code refuses to add the
trip point if it has no hysteresis property defined (regardless of the
trip type), see drivers/thermal/thermal_of.c:109

> > +                                     type = "critical";
> > +                             };
> > +                     };
> > +             };
> > +
> > +             /* sensor between A76 cores 0 and 1 */
> > +             bigcore0_thermal: bigcore0-thermal {
> > +                     polling-delay-passive = <20>;
> > +                     polling-delay = <1000>;
>
> The driver is interrupt driven. No need to poll.
>
> > +                     thermal-sensors = <&tsadc 1>;
> > +
> > +                     trips {
> > +                             bigcore0_alert: bigcore0-alert {
> > +                                     temperature = <85000>;
> > +                                     hysteresis = <2000>;
> > +                                     type = "passive";
> > +                             };
> > +                             bigcore0_crit: bigcore0-crit {
> > +                                     temperature = <115000>;
> > +                                     hysteresis = <2000>;
> > +                                     type = "critical";
> > +                             };
> > +                     };
> > +                     cooling-maps {
> > +                             map0 {
> > +                                     trip = <&bigcore0_alert>;
> > +                                     cooling-device =
> > +                                             <&cpu_b0 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
> > +                                             <&cpu_b1 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
> > +                                     contribution = <1024>;
>
> If you specify the contribution, that means it is expected to use the
> IPA governor. However, this one needs an extra trip point before 'alert'
> to begin collecting temperatures in order to initialize the PID loop of
> the IPA.

Thank you! Will add extra passive cooling trip points at 75C to all
three CPU clusters.

Best regards,
Alexey
Daniel Lezcano Jan. 22, 2024, 12:04 a.m. UTC | #5
Hi Alexey,


On 21/01/2024 20:57, Alexey Charkov wrote:
> On Fri, Jan 19, 2024 at 8:21 PM Daniel Lezcano
> <daniel.lezcano@linaro.org> wrote:
> Hello Daniel,
> 
> Thanks a lot for your review and comments! Please see some reflections below.
> 
>> On 09/01/2024 20:19, Alexey Charkov wrote:
>>> Include thermal zones information in device tree for rk3588 variants
>>> and enable the built-in thermal sensing ADC on RADXA Rock 5B
>>>
>>> Signed-off-by: Alexey Charkov <alchark@gmail.com>
>>> ---
>>> Changes in v2:
>>>    - Dropped redundant comments
>>>    - Included all CPU cores in cooling maps
>>>    - Split cooling maps into more granular ones utilizing TSADC
>>>      channels 1-3 which measure temperature by separate CPU clusters
>>>      instead of channel 0 which measures the center of the SoC die
>>> ---
>>>    .../boot/dts/rockchip/rk3588-rock-5b.dts      |   4 +
>>>    arch/arm64/boot/dts/rockchip/rk3588s.dtsi     | 151 ++++++++++++++++++
>>>    2 files changed, 155 insertions(+)
>>>
>>> diff --git a/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts b/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts
>>> index a5a104131403..f9d540000de3 100644
>>> --- a/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts
>>> +++ b/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts
>>> @@ -772,3 +772,7 @@ &usb_host1_ehci {
>>>    &usb_host1_ohci {
>>>        status = "okay";
>>>    };
>>> +
>>> +&tsadc {
>>> +     status = "okay";
>>> +};
>>> diff --git a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
>>> index 8aa0499f9b03..8d54998d0ecc 100644
>>> --- a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
>>> +++ b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
>>> @@ -10,6 +10,7 @@
>>>    #include <dt-bindings/reset/rockchip,rk3588-cru.h>
>>>    #include <dt-bindings/phy/phy.h>
>>>    #include <dt-bindings/ata/ahci.h>
>>> +#include <dt-bindings/thermal/thermal.h>
>>>
>>>    / {
>>>        compatible = "rockchip,rk3588";
>>> @@ -2112,6 +2113,156 @@ tsadc: tsadc@fec00000 {
>>>                status = "disabled";
>>>        };
>>>
>>> +     thermal_zones: thermal-zones {
>>> +             /* sensor near the center of the whole chip */
>>> +             soc_thermal: soc-thermal {
>>> +                     polling-delay-passive = <20>;
>>
>> There is no mitigation set for this thermal zone. It is pointless to
>> specify a passive polling.
> 
> Indeed, it makes sense to me. There seems to be a catch though in that
> the driver calls the generic thermal_of_zone_register during the
> initial probe, which expects both of those polling delays to be
> present in the device tree, otherwise it simply refuses to add the
> respective thermal zone, see drivers/thermal/thermal_of.c:502

Usually:

polling-delay-passive = <0>;
polling-delay = <0>;

cf:

git grep "polling-delay = <0>" arch/arm64/boot/dts

>>> +                     polling-delay = <1000>;
>>
>> The driver is interrupt driven. No need to poll.
> 
> Same here as above
> 
>>> +                     sustainable-power = <2100>;
>>
>> There is no mitigation with this thermal zone. Specifying a sustainable
>> power does not make sense.
> 
> Thanks, will drop this in v3!
> 
>>> +                     thermal-sensors = <&tsadc 0>;
>>> +
>>> +                     trips {
>>> +                             soc_crit: soc-crit {
>>> +                                     temperature = <115000>;
>>> +                                     hysteresis = <2000>;
>>
>> This trip point leads to a system shutdown / reboot. It is not necessary
>> to specify a hysteresis.
> 
> Similar to the above, the generic thermal_of code refuses to add the
> trip point if it has no hysteresis property defined (regardless of the
> trip type), see drivers/thermal/thermal_of.c:109

hysteresis = <0>;
Alexey Charkov Jan. 22, 2024, 5:57 a.m. UTC | #6
On Mon, Jan 22, 2024 at 4:04 AM Daniel Lezcano
<daniel.lezcano@linaro.org> wrote:
>
>
> Hi Alexey,
>
>
> On 21/01/2024 20:57, Alexey Charkov wrote:
> > On Fri, Jan 19, 2024 at 8:21 PM Daniel Lezcano
> > <daniel.lezcano@linaro.org> wrote:
> > Hello Daniel,
> >
> > Thanks a lot for your review and comments! Please see some reflections below.
> >
> >> On 09/01/2024 20:19, Alexey Charkov wrote:
> >>> Include thermal zones information in device tree for rk3588 variants
> >>> and enable the built-in thermal sensing ADC on RADXA Rock 5B
> >>>
> >>> Signed-off-by: Alexey Charkov <alchark@gmail.com>
> >>> ---
> >>> Changes in v2:
> >>>    - Dropped redundant comments
> >>>    - Included all CPU cores in cooling maps
> >>>    - Split cooling maps into more granular ones utilizing TSADC
> >>>      channels 1-3 which measure temperature by separate CPU clusters
> >>>      instead of channel 0 which measures the center of the SoC die
> >>> ---
> >>>    .../boot/dts/rockchip/rk3588-rock-5b.dts      |   4 +
> >>>    arch/arm64/boot/dts/rockchip/rk3588s.dtsi     | 151 ++++++++++++++++++
> >>>    2 files changed, 155 insertions(+)
> >>>
> >>> diff --git a/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts b/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts
> >>> index a5a104131403..f9d540000de3 100644
> >>> --- a/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts
> >>> +++ b/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts
> >>> @@ -772,3 +772,7 @@ &usb_host1_ehci {
> >>>    &usb_host1_ohci {
> >>>        status = "okay";
> >>>    };
> >>> +
> >>> +&tsadc {
> >>> +     status = "okay";
> >>> +};
> >>> diff --git a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
> >>> index 8aa0499f9b03..8d54998d0ecc 100644
> >>> --- a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
> >>> +++ b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
> >>> @@ -10,6 +10,7 @@
> >>>    #include <dt-bindings/reset/rockchip,rk3588-cru.h>
> >>>    #include <dt-bindings/phy/phy.h>
> >>>    #include <dt-bindings/ata/ahci.h>
> >>> +#include <dt-bindings/thermal/thermal.h>
> >>>
> >>>    / {
> >>>        compatible = "rockchip,rk3588";
> >>> @@ -2112,6 +2113,156 @@ tsadc: tsadc@fec00000 {
> >>>                status = "disabled";
> >>>        };
> >>>
> >>> +     thermal_zones: thermal-zones {
> >>> +             /* sensor near the center of the whole chip */
> >>> +             soc_thermal: soc-thermal {
> >>> +                     polling-delay-passive = <20>;
> >>
> >> There is no mitigation set for this thermal zone. It is pointless to
> >> specify a passive polling.
> >
> > Indeed, it makes sense to me. There seems to be a catch though in that
> > the driver calls the generic thermal_of_zone_register during the
> > initial probe, which expects both of those polling delays to be
> > present in the device tree, otherwise it simply refuses to add the
> > respective thermal zone, see drivers/thermal/thermal_of.c:502
>
> Usually:
>
> polling-delay-passive = <0>;
> polling-delay = <0>;
>
> cf:
>
> git grep "polling-delay = <0>" arch/arm64/boot/dts

Indeed, thanks a lot for the pointer! Somehow it slipped my attention.
Will test and amend accordingly.

> >>> +                     polling-delay = <1000>;
> >>
> >> The driver is interrupt driven. No need to poll.
> >
> > Same here as above
> >
> >>> +                     sustainable-power = <2100>;
> >>
> >> There is no mitigation with this thermal zone. Specifying a sustainable
> >> power does not make sense.
> >
> > Thanks, will drop this in v3!
> >
> >>> +                     thermal-sensors = <&tsadc 0>;
> >>> +
> >>> +                     trips {
> >>> +                             soc_crit: soc-crit {
> >>> +                                     temperature = <115000>;
> >>> +                                     hysteresis = <2000>;
> >>
> >> This trip point leads to a system shutdown / reboot. It is not necessary
> >> to specify a hysteresis.
> >
> > Similar to the above, the generic thermal_of code refuses to add the
> > trip point if it has no hysteresis property defined (regardless of the
> > trip type), see drivers/thermal/thermal_of.c:109
>
> hysteresis = <0>;

Makes sense, thank you! Will amend accordingly.

Best regards,
Alexey
Alexey Charkov Jan. 23, 2024, 7:47 p.m. UTC | #7
On Mon, Jan 22, 2024 at 4:04 AM Daniel Lezcano
<daniel.lezcano@linaro.org> wrote:
>
>
> Hi Alexey,
>
>
> On 21/01/2024 20:57, Alexey Charkov wrote:
> > On Fri, Jan 19, 2024 at 8:21 PM Daniel Lezcano
> > <daniel.lezcano@linaro.org> wrote:
> > Hello Daniel,
> >
> > Thanks a lot for your review and comments! Please see some reflections below.
> >
> >> On 09/01/2024 20:19, Alexey Charkov wrote:
> >>> Include thermal zones information in device tree for rk3588 variants
> >>> and enable the built-in thermal sensing ADC on RADXA Rock 5B
> >>>
> >>> Signed-off-by: Alexey Charkov <alchark@gmail.com>
> >>> ---
> >>> Changes in v2:
> >>>    - Dropped redundant comments
> >>>    - Included all CPU cores in cooling maps
> >>>    - Split cooling maps into more granular ones utilizing TSADC
> >>>      channels 1-3 which measure temperature by separate CPU clusters
> >>>      instead of channel 0 which measures the center of the SoC die
> >>> ---
> >>>    .../boot/dts/rockchip/rk3588-rock-5b.dts      |   4 +
> >>>    arch/arm64/boot/dts/rockchip/rk3588s.dtsi     | 151 ++++++++++++++++++
> >>>    2 files changed, 155 insertions(+)
> >>>
> >>> diff --git a/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts b/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts
> >>> index a5a104131403..f9d540000de3 100644
> >>> --- a/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts
> >>> +++ b/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts
> >>> @@ -772,3 +772,7 @@ &usb_host1_ehci {
> >>>    &usb_host1_ohci {
> >>>        status = "okay";
> >>>    };
> >>> +
> >>> +&tsadc {
> >>> +     status = "okay";
> >>> +};
> >>> diff --git a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
> >>> index 8aa0499f9b03..8d54998d0ecc 100644
> >>> --- a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
> >>> +++ b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
> >>> @@ -10,6 +10,7 @@
> >>>    #include <dt-bindings/reset/rockchip,rk3588-cru.h>
> >>>    #include <dt-bindings/phy/phy.h>
> >>>    #include <dt-bindings/ata/ahci.h>
> >>> +#include <dt-bindings/thermal/thermal.h>
> >>>
> >>>    / {
> >>>        compatible = "rockchip,rk3588";
> >>> @@ -2112,6 +2113,156 @@ tsadc: tsadc@fec00000 {
> >>>                status = "disabled";
> >>>        };
> >>>
> >>> +     thermal_zones: thermal-zones {
> >>> +             /* sensor near the center of the whole chip */
> >>> +             soc_thermal: soc-thermal {
> >>> +                     polling-delay-passive = <20>;
> >>
> >> There is no mitigation set for this thermal zone. It is pointless to
> >> specify a passive polling.
> >
> > Indeed, it makes sense to me. There seems to be a catch though in that
> > the driver calls the generic thermal_of_zone_register during the
> > initial probe, which expects both of those polling delays to be
> > present in the device tree, otherwise it simply refuses to add the
> > respective thermal zone, see drivers/thermal/thermal_of.c:502
>
> Usually:
>
> polling-delay-passive = <0>;
> polling-delay = <0>;
>
> cf:
>
> git grep "polling-delay = <0>" arch/arm64/boot/dts

For some reason when I have both polling-delay-passive and
polling-delay set to 0, the active cooling map I have in my board DT
(using a PWM controlled fan) behaves weirdly.

I use the following fragment in my board DTS:

+&package_thermal {
+       trips {
+               package_fan: package-fan {
+                       temperature = <55000>;
+                       hysteresis = <2000>;
+                       type = "active";
+               };
+       };
+
+       cooling-maps {
+               map-fan {
+                       trip = <&package_fan>;
+                       cooling-device = <&fan THERMAL_NO_LIMIT
THERMAL_NO_LIMIT>;
+               };
+       };
+};

If I add polling-delay = <1000>; at the top, the fan speeds up and
down dynamically as the package temperature swings around 55C. If I
remove that (having set polling-delay = <0>; in rk3588s.dtsi), the fan
speeds up to the midpoint cooling state once the package temperature
approaches 55C, and then it just stays there forever: it doesn't speed
up above the midpoint even as the temperature climbs above 70C, nor
does it spin down as it falls back to around 45C.

Is that the expected behavior for when the polling is disabled?

I haven't yet studied in detail if passive cooling kicks in correctly
with polling disabled, but this behavior with active cooling left me
quite confused - any pointers would be much appreciated.

Thanks a lot,
Alexey
Daniel Lezcano Jan. 24, 2024, 12:14 a.m. UTC | #8
On 23/01/2024 20:47, Alexey Charkov wrote:
> On Mon, Jan 22, 2024 at 4:04 AM Daniel Lezcano
> <daniel.lezcano@linaro.org> wrote:
>>
>>
>> Hi Alexey,
>>
>>
>> On 21/01/2024 20:57, Alexey Charkov wrote:
>>> On Fri, Jan 19, 2024 at 8:21 PM Daniel Lezcano
>>> <daniel.lezcano@linaro.org> wrote:
>>> Hello Daniel,
>>>
>>> Thanks a lot for your review and comments! Please see some reflections below.
>>>
>>>> On 09/01/2024 20:19, Alexey Charkov wrote:
>>>>> Include thermal zones information in device tree for rk3588 variants
>>>>> and enable the built-in thermal sensing ADC on RADXA Rock 5B
>>>>>
>>>>> Signed-off-by: Alexey Charkov <alchark@gmail.com>
>>>>> ---
>>>>> Changes in v2:
>>>>>     - Dropped redundant comments
>>>>>     - Included all CPU cores in cooling maps
>>>>>     - Split cooling maps into more granular ones utilizing TSADC
>>>>>       channels 1-3 which measure temperature by separate CPU clusters
>>>>>       instead of channel 0 which measures the center of the SoC die
>>>>> ---
>>>>>     .../boot/dts/rockchip/rk3588-rock-5b.dts      |   4 +
>>>>>     arch/arm64/boot/dts/rockchip/rk3588s.dtsi     | 151 ++++++++++++++++++
>>>>>     2 files changed, 155 insertions(+)
>>>>>
>>>>> diff --git a/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts b/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts
>>>>> index a5a104131403..f9d540000de3 100644
>>>>> --- a/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts
>>>>> +++ b/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts
>>>>> @@ -772,3 +772,7 @@ &usb_host1_ehci {
>>>>>     &usb_host1_ohci {
>>>>>         status = "okay";
>>>>>     };
>>>>> +
>>>>> +&tsadc {
>>>>> +     status = "okay";
>>>>> +};
>>>>> diff --git a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
>>>>> index 8aa0499f9b03..8d54998d0ecc 100644
>>>>> --- a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
>>>>> +++ b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
>>>>> @@ -10,6 +10,7 @@
>>>>>     #include <dt-bindings/reset/rockchip,rk3588-cru.h>
>>>>>     #include <dt-bindings/phy/phy.h>
>>>>>     #include <dt-bindings/ata/ahci.h>
>>>>> +#include <dt-bindings/thermal/thermal.h>
>>>>>
>>>>>     / {
>>>>>         compatible = "rockchip,rk3588";
>>>>> @@ -2112,6 +2113,156 @@ tsadc: tsadc@fec00000 {
>>>>>                 status = "disabled";
>>>>>         };
>>>>>
>>>>> +     thermal_zones: thermal-zones {
>>>>> +             /* sensor near the center of the whole chip */
>>>>> +             soc_thermal: soc-thermal {
>>>>> +                     polling-delay-passive = <20>;
>>>>
>>>> There is no mitigation set for this thermal zone. It is pointless to
>>>> specify a passive polling.
>>>
>>> Indeed, it makes sense to me. There seems to be a catch though in that
>>> the driver calls the generic thermal_of_zone_register during the
>>> initial probe, which expects both of those polling delays to be
>>> present in the device tree, otherwise it simply refuses to add the
>>> respective thermal zone, see drivers/thermal/thermal_of.c:502
>>
>> Usually:
>>
>> polling-delay-passive = <0>;
>> polling-delay = <0>;
>>
>> cf:
>>
>> git grep "polling-delay = <0>" arch/arm64/boot/dts
> 
> For some reason when I have both polling-delay-passive and
> polling-delay set to 0, the active cooling map I have in my board DT
> (using a PWM controlled fan) behaves weirdly.



> I use the following fragment in my board DTS:
> 
> +&package_thermal {
> +       trips {
> +               package_fan: package-fan {
> +                       temperature = <55000>;
> +                       hysteresis = <2000>;
> +                       type = "active";
> +               };
> +       };
> +
> +       cooling-maps {
> +               map-fan {
> +                       trip = <&package_fan>;
> +                       cooling-device = <&fan THERMAL_NO_LIMIT
> THERMAL_NO_LIMIT>;
> +               };
> +       };
> +};
> 
> If I add polling-delay = <1000>; at the top, the fan speeds up and
> down dynamically as the package temperature swings around 55C. If I
> remove that (having set polling-delay = <0>; in rk3588s.dtsi), the fan
> speeds up to the midpoint cooling state once the package temperature
> approaches 55C, and then it just stays there forever: it doesn't speed
> up above the midpoint even as the temperature climbs above 70C, nor
> does it spin down as it falls back to around 45C.
> 
> Is that the expected behavior for when the polling is disabled?

I don't know the rest of the DT this fragment was added to, but I'm not 
surprised there is misbehavior because the configuration is not correct 
in this case.

If there is a thermal zone with an active trip and an associated cooling 
device like a fan, then:
	-> polling-delay = <a_value>;
	-> polling-delay-passive = <0>;

If there is a thermal zone with a passive cooling device like cpufreq 
cooling device, then 2 cases:

  1. The sensor supports interrupt when crossing the trip point
	-> polling-delay = <0>;
	-> polling-delay-passive = <a_value>;

  2. The sensor does not support interrupt when crossing the trip point
	-> polling-delay = <a_value>;
	-> polling-delay-passive = <another_value>;

Why?

When the cooling device is a passive cooling device, then the mitigation 
happens with a higher temperature sampling rate in order to change the 
state of the cooling device hundred of times per second. On a fan, the 
cooling effect is too slow for that so we keep the polling for that.


> I haven't yet studied in detail if passive cooling kicks in correctly
> with polling disabled, but this behavior with active cooling left me
> quite confused - any pointers would be much appreciated.
> 
> Thanks a lot,
> Alexey
diff mbox series

Patch

diff --git a/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts b/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts
index a5a104131403..f9d540000de3 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts
@@ -772,3 +772,7 @@  &usb_host1_ehci {
 &usb_host1_ohci {
 	status = "okay";
 };
+
+&tsadc {
+	status = "okay";
+};
diff --git a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
index 8aa0499f9b03..8d54998d0ecc 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
@@ -10,6 +10,7 @@ 
 #include <dt-bindings/reset/rockchip,rk3588-cru.h>
 #include <dt-bindings/phy/phy.h>
 #include <dt-bindings/ata/ahci.h>
+#include <dt-bindings/thermal/thermal.h>
 
 / {
 	compatible = "rockchip,rk3588";
@@ -2112,6 +2113,156 @@  tsadc: tsadc@fec00000 {
 		status = "disabled";
 	};
 
+	thermal_zones: thermal-zones {
+		/* sensor near the center of the whole chip */
+		soc_thermal: soc-thermal {
+			polling-delay-passive = <20>;
+			polling-delay = <1000>;
+			sustainable-power = <2100>;
+			thermal-sensors = <&tsadc 0>;
+
+			trips {
+				soc_crit: soc-crit {
+					temperature = <115000>;
+					hysteresis = <2000>;
+					type = "critical";
+				};
+			};
+		};
+
+		/* sensor between A76 cores 0 and 1 */
+		bigcore0_thermal: bigcore0-thermal {
+			polling-delay-passive = <20>;
+			polling-delay = <1000>;
+			thermal-sensors = <&tsadc 1>;
+
+			trips {
+				bigcore0_alert: bigcore0-alert {
+					temperature = <85000>;
+					hysteresis = <2000>;
+					type = "passive";
+				};
+				bigcore0_crit: bigcore0-crit {
+					temperature = <115000>;
+					hysteresis = <2000>;
+					type = "critical";
+				};
+			};
+			cooling-maps {
+				map0 {
+					trip = <&bigcore0_alert>;
+					cooling-device =
+						<&cpu_b0 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
+						<&cpu_b1 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
+					contribution = <1024>;
+				};
+			};
+		};
+
+		/* sensor between A76 cores 2 and 3 */
+		bigcore2_thermal: bigcore2-thermal {
+			polling-delay-passive = <20>;
+			polling-delay = <1000>;
+			thermal-sensors = <&tsadc 2>;
+
+			trips {
+				bigcore2_alert: bigcore2-alert {
+					temperature = <85000>;
+					hysteresis = <2000>;
+					type = "passive";
+				};
+				bigcore2_crit: bigcore2-crit {
+					temperature = <115000>;
+					hysteresis = <2000>;
+					type = "critical";
+				};
+			};
+			cooling-maps {
+				map1 {
+					trip = <&bigcore2_alert>;
+					cooling-device =
+						<&cpu_b2 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
+						<&cpu_b3 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
+					contribution = <1024>;
+				};
+			};
+		};
+
+		/* sensor between the four A55 cores */
+		little_core_thermal: littlecore-thermal {
+			polling-delay-passive = <20>;
+			polling-delay = <1000>;
+			thermal-sensors = <&tsadc 3>;
+
+			trips {
+				littlecore_alert: littlecore-alert {
+					temperature = <85000>;
+					hysteresis = <2000>;
+					type = "passive";
+				};
+				littlecore_crit: littlecore-crit {
+					temperature = <115000>;
+					hysteresis = <2000>;
+					type = "critical";
+				};
+			};
+			cooling-maps {
+				map2 {
+					trip = <&littlecore_alert>;
+					cooling-device =
+						<&cpu_l0 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
+						<&cpu_l1 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
+						<&cpu_l2 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
+						<&cpu_l3 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
+					contribution = <1024>;
+				};
+			};
+		};
+
+		/* sensor near the PD_CENTER power domain */
+		center_thermal: center-thermal {
+			polling-delay-passive = <20>;
+			polling-delay = <1000>;
+			thermal-sensors = <&tsadc 4>;
+
+			trips {
+				center_crit: center-crit {
+					temperature = <115000>;
+					hysteresis = <2000>;
+					type = "critical";
+				};
+			};
+		};
+
+		gpu_thermal: gpu-thermal {
+			polling-delay-passive = <20>;
+			polling-delay = <1000>;
+			thermal-sensors = <&tsadc 5>;
+
+			trips {
+				gpu_crit: gpu-crit {
+					temperature = <115000>;
+					hysteresis = <2000>;
+					type = "critical";
+				};
+			};
+		};
+
+		npu_thermal: npu-thermal {
+			polling-delay-passive = <20>;
+			polling-delay = <1000>;
+			thermal-sensors = <&tsadc 6>;
+
+			trips {
+				npu_crit: npu-crit {
+					temperature = <115000>;
+					hysteresis = <2000>;
+					type = "critical";
+				};
+			};
+		};
+	};
+
 	saradc: adc@fec10000 {
 		compatible = "rockchip,rk3588-saradc";
 		reg = <0x0 0xfec10000 0x0 0x10000>;