diff mbox series

[v4,4/5] arm64: dts: sdm845: Add OPP tables and power-domains for venus

Message ID 1595503612-2901-5-git-send-email-rnayak@codeaurora.org (mailing list archive)
State Superseded
Headers show
Series DVFS support for Venus | expand

Commit Message

Rajendra Nayak July 23, 2020, 11:26 a.m. UTC
Add the OPP tables in order to be able to vote on the performance state of
a power-domain.

Signed-off-by: Rajendra Nayak <rnayak@codeaurora.org>
---
 arch/arm64/boot/dts/qcom/sdm845.dtsi | 40 ++++++++++++++++++++++++++++++++++--
 1 file changed, 38 insertions(+), 2 deletions(-)

Comments

Stanimir Varbanov July 23, 2020, 6:06 p.m. UTC | #1
Hi Rajendra,

After applying 2,3 and 4/5 patches on linaro-integration v5.8-rc2 I see
below messages on db845:

qcom-venus aa00000.video-codec: dev_pm_opp_set_rate: failed to find
current OPP for freq 533000097 (-34)

^^^ This one is new.

qcom_rpmh TCS Busy, retrying RPMH message send: addr=0x30000

^^^ and this message is annoying, can we make it pr_debug in rpmh?

On 7/23/20 2:26 PM, Rajendra Nayak wrote:
> Add the OPP tables in order to be able to vote on the performance state of
> a power-domain.
> 
> Signed-off-by: Rajendra Nayak <rnayak@codeaurora.org>
> ---
>  arch/arm64/boot/dts/qcom/sdm845.dtsi | 40 ++++++++++++++++++++++++++++++++++--
>  1 file changed, 38 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/arm64/boot/dts/qcom/sdm845.dtsi b/arch/arm64/boot/dts/qcom/sdm845.dtsi
> index e506793..5ca2265 100644
> --- a/arch/arm64/boot/dts/qcom/sdm845.dtsi
> +++ b/arch/arm64/boot/dts/qcom/sdm845.dtsi
> @@ -3631,8 +3631,10 @@
>  			interrupts = <GIC_SPI 174 IRQ_TYPE_LEVEL_HIGH>;
>  			power-domains = <&videocc VENUS_GDSC>,
>  					<&videocc VCODEC0_GDSC>,
> -					<&videocc VCODEC1_GDSC>;
> -			power-domain-names = "venus", "vcodec0", "vcodec1";
> +					<&videocc VCODEC1_GDSC>,
> +					<&rpmhpd SDM845_CX>;
> +			power-domain-names = "venus", "vcodec0", "vcodec1", "cx";
> +			operating-points-v2 = <&venus_opp_table>;
>  			clocks = <&videocc VIDEO_CC_VENUS_CTL_CORE_CLK>,
>  				 <&videocc VIDEO_CC_VENUS_AHB_CLK>,
>  				 <&videocc VIDEO_CC_VENUS_CTL_AXI_CLK>,
> @@ -3654,6 +3656,40 @@
>  			video-core1 {
>  				compatible = "venus-encoder";
>  			};
> +
> +			venus_opp_table: venus-opp-table {
> +				compatible = "operating-points-v2";
> +
> +				opp-100000000 {
> +					opp-hz = /bits/ 64 <100000000>;
> +					required-opps = <&rpmhpd_opp_min_svs>;
> +				};
> +
> +				opp-200000000 {
> +					opp-hz = /bits/ 64 <200000000>;
> +					required-opps = <&rpmhpd_opp_low_svs>;
> +				};
> +
> +				opp-320000000 {
> +					opp-hz = /bits/ 64 <320000000>;
> +					required-opps = <&rpmhpd_opp_svs>;
> +				};
> +
> +				opp-380000000 {
> +					opp-hz = /bits/ 64 <380000000>;
> +					required-opps = <&rpmhpd_opp_svs_l1>;
> +				};
> +
> +				opp-444000000 {
> +					opp-hz = /bits/ 64 <444000000>;
> +					required-opps = <&rpmhpd_opp_nom>;
> +				};
> +
> +				opp-533000000 {
> +					opp-hz = /bits/ 64 <533000000>;
> +					required-opps = <&rpmhpd_opp_turbo>;
> +				};
> +			};
>  		};
>  
>  		videocc: clock-controller@ab00000 {
>
Rajendra Nayak July 24, 2020, 8:49 a.m. UTC | #2
Hey Stan,

On 7/23/2020 11:36 PM, Stanimir Varbanov wrote:
> Hi Rajendra,
> 
> After applying 2,3 and 4/5 patches on linaro-integration v5.8-rc2 I see
> below messages on db845:
> 
> qcom-venus aa00000.video-codec: dev_pm_opp_set_rate: failed to find
> current OPP for freq 533000097 (-34)
> 
> ^^^ This one is new.

I was hoping to be able to reproduce this on the 845 mtp (I don't have
a db845), but I can;t seem to get venus working on mainline [1]
(neither with linux-next, nor with linaro integration)
Do you know if I might be missing some fix?

> 
> qcom_rpmh TCS Busy, retrying RPMH message send: addr=0x30000
> 
> ^^^ and this message is annoying, can we make it pr_debug in rpmh?

Sure, I'll send a patch for that and see what the rpmh owners have to say.

[1]

[    1.632147] qcom-venus aa00000.video-codec: Adding to iommu group 2
[    1.638920] qcom-venus aa00000.video-codec: non legacy binding
[    1.648313] ------------[ cut here ]------------
[    1.652976] video_cc_venus_ctl_axi_clk status stuck at 'off'
[    1.653068] WARNING: CPU: 7 PID: 1 at drivers/clk/qcom/clk-branch.c:92 clk_b8
[    1.667977] Modules linked in:
[    1.671076] CPU: 7 PID: 1 Comm: swapper/0 Not tainted 5.8.0-rc6-00254-gc43551
[    1.678704] Hardware name: Qualcomm Technologies, Inc. SDM845 MTP (DT)
[    1.685294] pstate: 60c00085 (nZCv daIf +PAN +UAO BTYPE=--)
[    1.690911] pc : clk_branch_toggle+0x14c/0x168
[    1.695397] lr : clk_branch_toggle+0x14c/0x168
[    1.699881] sp : ffff80001005b900
[    1.703232] x29: ffff80001005b900 x28: ffffb58586ac2f38
[    1.708594] x27: ffff0000f86c6d48 x26: ffffb58586f09000
[    1.713953] x25: ffffb585867c0bd8 x24: 0000000000000000
[    1.719312] x23: ffffb5858702df28 x22: ffffb58585a3c6a8
[    1.724672] x21: 0000000000000001 x20: ffffb58586f09000
[    1.730031] x19: 0000000000000000 x18: ffffb58586f09948
[    1.735390] x17: 0000000000000001 x16: 0000000000000019
[    1.740749] x15: ffff80009005b5a7 x14: 0000000000000006
[    1.746107] x13: ffff80001005b5b5 x12: ffffb58586f21d68
[    1.751466] x11: 0000000000000000 x10: 0000000005f5e0ff
[    1.756825] x9 : ffff80001005b900 x8 : 2766666f27207461
[    1.762184] x7 : 206b637574732073 x6 : ffffb58587141848
[    1.767543] x5 : 0000000000000000 x4 : 0000000000000000
[    1.772902] x3 : 00000000ffffffff x2 : ffff4a7b76cd8000
[    1.778261] x1 : ad405f90446fcf00 x0 : 0000000000000000
[    1.783624] Call trace:
[    1.786103]  clk_branch_toggle+0x14c/0x168
[    1.790241]  clk_branch2_enable+0x18/0x20
[    1.794306]  clk_core_enable+0x60/0xa8
[    1.798090]  clk_core_enable_lock+0x20/0x40
[    1.802316]  clk_enable+0x14/0x28
[    1.805682]  core_clks_enable+0x94/0xd8
[    1.809562]  core_power_v4+0x48/0x50
[    1.813178]  venus_runtime_resume+0x24/0x40
[    1.817417]  pm_generic_runtime_resume+0x28/0x40
[    1.822079]  __rpm_callback+0xa0/0x138
[    1.825861]  rpm_callback+0x24/0x98
[    1.829390]  rpm_resume+0x32c/0x490
[    1.832917]  __pm_runtime_resume+0x38/0x88
[    1.837051]  venus_probe+0x1f0/0x34c
[    1.840667]  platform_drv_probe+0x4c/0xa8
[    1.844727]  really_probe+0x100/0x388
[    1.848428]  driver_probe_device+0x54/0xb8
[    1.852563]  device_driver_attach+0x6c/0x78
[    1.856790]  __driver_attach+0xb0/0xf0
[    1.860576]  bus_for_each_dev+0x68/0xc8
[    1.864456]  driver_attach+0x20/0x28
[    1.868064]  bus_add_driver+0x148/0x200
[    1.871941]  driver_register+0x60/0x110
[    1.875819]  __platform_driver_register+0x44/0x50
[    1.880576]  qcom_venus_driver_init+0x18/0x20
[    1.884990]  do_one_initcall+0x58/0x1a0
[    1.888878]  kernel_init_freeable+0x1fc/0x28c
[    1.893292]  kernel_init+0x10/0x108
[    1.896821]  ret_from_fork+0x10/0x1c
[    1.900441] ---[ end trace f12a7e5e182f3e4e ]---
[    1.906415] qcom-venus: probe of aa00000.video-codec failed with error -16

> 
> On 7/23/20 2:26 PM, Rajendra Nayak wrote:
>> Add the OPP tables in order to be able to vote on the performance state of
>> a power-domain.
>>
>> Signed-off-by: Rajendra Nayak <rnayak@codeaurora.org>
>> ---
>>   arch/arm64/boot/dts/qcom/sdm845.dtsi | 40 ++++++++++++++++++++++++++++++++++--
>>   1 file changed, 38 insertions(+), 2 deletions(-)
>>
>> diff --git a/arch/arm64/boot/dts/qcom/sdm845.dtsi b/arch/arm64/boot/dts/qcom/sdm845.dtsi
>> index e506793..5ca2265 100644
>> --- a/arch/arm64/boot/dts/qcom/sdm845.dtsi
>> +++ b/arch/arm64/boot/dts/qcom/sdm845.dtsi
>> @@ -3631,8 +3631,10 @@
>>   			interrupts = <GIC_SPI 174 IRQ_TYPE_LEVEL_HIGH>;
>>   			power-domains = <&videocc VENUS_GDSC>,
>>   					<&videocc VCODEC0_GDSC>,
>> -					<&videocc VCODEC1_GDSC>;
>> -			power-domain-names = "venus", "vcodec0", "vcodec1";
>> +					<&videocc VCODEC1_GDSC>,
>> +					<&rpmhpd SDM845_CX>;
>> +			power-domain-names = "venus", "vcodec0", "vcodec1", "cx";
>> +			operating-points-v2 = <&venus_opp_table>;
>>   			clocks = <&videocc VIDEO_CC_VENUS_CTL_CORE_CLK>,
>>   				 <&videocc VIDEO_CC_VENUS_AHB_CLK>,
>>   				 <&videocc VIDEO_CC_VENUS_CTL_AXI_CLK>,
>> @@ -3654,6 +3656,40 @@
>>   			video-core1 {
>>   				compatible = "venus-encoder";
>>   			};
>> +
>> +			venus_opp_table: venus-opp-table {
>> +				compatible = "operating-points-v2";
>> +
>> +				opp-100000000 {
>> +					opp-hz = /bits/ 64 <100000000>;
>> +					required-opps = <&rpmhpd_opp_min_svs>;
>> +				};
>> +
>> +				opp-200000000 {
>> +					opp-hz = /bits/ 64 <200000000>;
>> +					required-opps = <&rpmhpd_opp_low_svs>;
>> +				};
>> +
>> +				opp-320000000 {
>> +					opp-hz = /bits/ 64 <320000000>;
>> +					required-opps = <&rpmhpd_opp_svs>;
>> +				};
>> +
>> +				opp-380000000 {
>> +					opp-hz = /bits/ 64 <380000000>;
>> +					required-opps = <&rpmhpd_opp_svs_l1>;
>> +				};
>> +
>> +				opp-444000000 {
>> +					opp-hz = /bits/ 64 <444000000>;
>> +					required-opps = <&rpmhpd_opp_nom>;
>> +				};
>> +
>> +				opp-533000000 {
>> +					opp-hz = /bits/ 64 <533000000>;
>> +					required-opps = <&rpmhpd_opp_turbo>;
>> +				};
>> +			};
>>   		};
>>   
>>   		videocc: clock-controller@ab00000 {
>>
>
Rajendra Nayak July 24, 2020, 9:02 a.m. UTC | #3
Hi Maulik/Lina,

On 7/23/2020 11:36 PM, Stanimir Varbanov wrote:
> Hi Rajendra,
> 
> After applying 2,3 and 4/5 patches on linaro-integration v5.8-rc2 I see
> below messages on db845:
> 
> qcom-venus aa00000.video-codec: dev_pm_opp_set_rate: failed to find
> current OPP for freq 533000097 (-34)
> 
> ^^^ This one is new.
> 
> qcom_rpmh TCS Busy, retrying RPMH message send: addr=0x30000
> 
> ^^^ and this message is annoying, can we make it pr_debug in rpmh?

Would you be fine with moving this message to a pr_debug? Its currently
a pr_info_ratelimited()

thanks,
Rajendra
Stanimir Varbanov July 24, 2020, 10:16 a.m. UTC | #4
Hi,

On 7/24/20 11:49 AM, Rajendra Nayak wrote:
> Hey Stan,
> 
> On 7/23/2020 11:36 PM, Stanimir Varbanov wrote:
>> Hi Rajendra,
>>
>> After applying 2,3 and 4/5 patches on linaro-integration v5.8-rc2 I see
>> below messages on db845:
>>
>> qcom-venus aa00000.video-codec: dev_pm_opp_set_rate: failed to find
>> current OPP for freq 533000097 (-34)
>>
>> ^^^ This one is new.
> 
> I was hoping to be able to reproduce this on the 845 mtp (I don't have
> a db845), but I can;t seem to get venus working on mainline [1]
> (neither with linux-next, nor with linaro integration)

The driver should at least load without errors on -next and
linaro-integration for db845. As to MTP and haven't checked because I
don't have such board with me.

I will try to debug dev_pm_opp_set_rate -ERANGE error.

> Do you know if I might be missing some fix?
> 
>>
>> qcom_rpmh TCS Busy, retrying RPMH message send: addr=0x30000
>>
>> ^^^ and this message is annoying, can we make it pr_debug in rpmh?
> 
> Sure, I'll send a patch for that and see what the rpmh owners have to say.
> 
> [1]
> 
> [    1.632147] qcom-venus aa00000.video-codec: Adding to iommu group 2
> [    1.638920] qcom-venus aa00000.video-codec: non legacy binding
> [    1.648313] ------------[ cut here ]------------
> [    1.652976] video_cc_venus_ctl_axi_clk status stuck at 'off'

I guess this means that venus_gdsc is not powered on.

> [    1.653068] WARNING: CPU: 7 PID: 1 at
> drivers/clk/qcom/clk-branch.c:92 clk_b8
> [    1.667977] Modules linked in:
> [    1.671076] CPU: 7 PID: 1 Comm: swapper/0 Not tainted
> 5.8.0-rc6-00254-gc43551
> [    1.678704] Hardware name: Qualcomm Technologies, Inc. SDM845 MTP (DT)
> [    1.685294] pstate: 60c00085 (nZCv daIf +PAN +UAO BTYPE=--)
> [    1.690911] pc : clk_branch_toggle+0x14c/0x168
> [    1.695397] lr : clk_branch_toggle+0x14c/0x168
> [    1.699881] sp : ffff80001005b900
> [    1.703232] x29: ffff80001005b900 x28: ffffb58586ac2f38
> [    1.708594] x27: ffff0000f86c6d48 x26: ffffb58586f09000
> [    1.713953] x25: ffffb585867c0bd8 x24: 0000000000000000
> [    1.719312] x23: ffffb5858702df28 x22: ffffb58585a3c6a8
> [    1.724672] x21: 0000000000000001 x20: ffffb58586f09000
> [    1.730031] x19: 0000000000000000 x18: ffffb58586f09948
> [    1.735390] x17: 0000000000000001 x16: 0000000000000019
> [    1.740749] x15: ffff80009005b5a7 x14: 0000000000000006
> [    1.746107] x13: ffff80001005b5b5 x12: ffffb58586f21d68
> [    1.751466] x11: 0000000000000000 x10: 0000000005f5e0ff
> [    1.756825] x9 : ffff80001005b900 x8 : 2766666f27207461
> [    1.762184] x7 : 206b637574732073 x6 : ffffb58587141848
> [    1.767543] x5 : 0000000000000000 x4 : 0000000000000000
> [    1.772902] x3 : 00000000ffffffff x2 : ffff4a7b76cd8000
> [    1.778261] x1 : ad405f90446fcf00 x0 : 0000000000000000
> [    1.783624] Call trace:
> [    1.786103]  clk_branch_toggle+0x14c/0x168
> [    1.790241]  clk_branch2_enable+0x18/0x20
> [    1.794306]  clk_core_enable+0x60/0xa8
> [    1.798090]  clk_core_enable_lock+0x20/0x40
> [    1.802316]  clk_enable+0x14/0x28
> [    1.805682]  core_clks_enable+0x94/0xd8
> [    1.809562]  core_power_v4+0x48/0x50
> [    1.813178]  venus_runtime_resume+0x24/0x40
> [    1.817417]  pm_generic_runtime_resume+0x28/0x40
> [    1.822079]  __rpm_callback+0xa0/0x138
> [    1.825861]  rpm_callback+0x24/0x98
> [    1.829390]  rpm_resume+0x32c/0x490
> [    1.832917]  __pm_runtime_resume+0x38/0x88
> [    1.837051]  venus_probe+0x1f0/0x34c
> [    1.840667]  platform_drv_probe+0x4c/0xa8
> [    1.844727]  really_probe+0x100/0x388
> [    1.848428]  driver_probe_device+0x54/0xb8
> [    1.852563]  device_driver_attach+0x6c/0x78
> [    1.856790]  __driver_attach+0xb0/0xf0
> [    1.860576]  bus_for_each_dev+0x68/0xc8
> [    1.864456]  driver_attach+0x20/0x28
> [    1.868064]  bus_add_driver+0x148/0x200
> [    1.871941]  driver_register+0x60/0x110
> [    1.875819]  __platform_driver_register+0x44/0x50
> [    1.880576]  qcom_venus_driver_init+0x18/0x20
> [    1.884990]  do_one_initcall+0x58/0x1a0
> [    1.888878]  kernel_init_freeable+0x1fc/0x28c
> [    1.893292]  kernel_init+0x10/0x108
> [    1.896821]  ret_from_fork+0x10/0x1c
> [    1.900441] ---[ end trace f12a7e5e182f3e4e ]---
> [    1.906415] qcom-venus: probe of aa00000.video-codec failed with
> error -16
> 
>>
>> On 7/23/20 2:26 PM, Rajendra Nayak wrote:
>>> Add the OPP tables in order to be able to vote on the performance
>>> state of
>>> a power-domain.
>>>
>>> Signed-off-by: Rajendra Nayak <rnayak@codeaurora.org>
>>> ---
>>>   arch/arm64/boot/dts/qcom/sdm845.dtsi | 40
>>> ++++++++++++++++++++++++++++++++++--
>>>   1 file changed, 38 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/arch/arm64/boot/dts/qcom/sdm845.dtsi
>>> b/arch/arm64/boot/dts/qcom/sdm845.dtsi
>>> index e506793..5ca2265 100644
>>> --- a/arch/arm64/boot/dts/qcom/sdm845.dtsi
>>> +++ b/arch/arm64/boot/dts/qcom/sdm845.dtsi
>>> @@ -3631,8 +3631,10 @@
>>>               interrupts = <GIC_SPI 174 IRQ_TYPE_LEVEL_HIGH>;
>>>               power-domains = <&videocc VENUS_GDSC>,
>>>                       <&videocc VCODEC0_GDSC>,
>>> -                    <&videocc VCODEC1_GDSC>;
>>> -            power-domain-names = "venus", "vcodec0", "vcodec1";
>>> +                    <&videocc VCODEC1_GDSC>,
>>> +                    <&rpmhpd SDM845_CX>;
>>> +            power-domain-names = "venus", "vcodec0", "vcodec1", "cx";
>>> +            operating-points-v2 = <&venus_opp_table>;
>>>               clocks = <&videocc VIDEO_CC_VENUS_CTL_CORE_CLK>,
>>>                    <&videocc VIDEO_CC_VENUS_AHB_CLK>,
>>>                    <&videocc VIDEO_CC_VENUS_CTL_AXI_CLK>,
>>> @@ -3654,6 +3656,40 @@
>>>               video-core1 {
>>>                   compatible = "venus-encoder";
>>>               };
>>> +
>>> +            venus_opp_table: venus-opp-table {
>>> +                compatible = "operating-points-v2";
>>> +
>>> +                opp-100000000 {
>>> +                    opp-hz = /bits/ 64 <100000000>;
>>> +                    required-opps = <&rpmhpd_opp_min_svs>;
>>> +                };
>>> +
>>> +                opp-200000000 {
>>> +                    opp-hz = /bits/ 64 <200000000>;
>>> +                    required-opps = <&rpmhpd_opp_low_svs>;
>>> +                };
>>> +
>>> +                opp-320000000 {
>>> +                    opp-hz = /bits/ 64 <320000000>;
>>> +                    required-opps = <&rpmhpd_opp_svs>;
>>> +                };
>>> +
>>> +                opp-380000000 {
>>> +                    opp-hz = /bits/ 64 <380000000>;
>>> +                    required-opps = <&rpmhpd_opp_svs_l1>;
>>> +                };
>>> +
>>> +                opp-444000000 {
>>> +                    opp-hz = /bits/ 64 <444000000>;
>>> +                    required-opps = <&rpmhpd_opp_nom>;
>>> +                };
>>> +
>>> +                opp-533000000 {
>>> +                    opp-hz = /bits/ 64 <533000000>;
>>> +                    required-opps = <&rpmhpd_opp_turbo>;
>>> +                };
>>> +            };
>>>           };
>>>             videocc: clock-controller@ab00000 {
>>>
>>
>
Stanimir Varbanov July 24, 2020, 2:09 p.m. UTC | #5
Hi,

On 7/23/20 9:06 PM, Stanimir Varbanov wrote:
> Hi Rajendra,
> 
> After applying 2,3 and 4/5 patches on linaro-integration v5.8-rc2 I see
> below messages on db845:
> 
> qcom-venus aa00000.video-codec: dev_pm_opp_set_rate: failed to find
> current OPP for freq 533000097 (-34)
> 
> ^^^ This one is new.
> 
> qcom_rpmh TCS Busy, retrying RPMH message send: addr=0x30000
> 
> ^^^ and this message is annoying, can we make it pr_debug in rpmh?
> 
> On 7/23/20 2:26 PM, Rajendra Nayak wrote:
>> Add the OPP tables in order to be able to vote on the performance state of
>> a power-domain.
>>
>> Signed-off-by: Rajendra Nayak <rnayak@codeaurora.org>
>> ---
>>  arch/arm64/boot/dts/qcom/sdm845.dtsi | 40 ++++++++++++++++++++++++++++++++++--
>>  1 file changed, 38 insertions(+), 2 deletions(-)
>>
>> diff --git a/arch/arm64/boot/dts/qcom/sdm845.dtsi b/arch/arm64/boot/dts/qcom/sdm845.dtsi
>> index e506793..5ca2265 100644
>> --- a/arch/arm64/boot/dts/qcom/sdm845.dtsi
>> +++ b/arch/arm64/boot/dts/qcom/sdm845.dtsi
>> @@ -3631,8 +3631,10 @@
>>  			interrupts = <GIC_SPI 174 IRQ_TYPE_LEVEL_HIGH>;
>>  			power-domains = <&videocc VENUS_GDSC>,
>>  					<&videocc VCODEC0_GDSC>,
>> -					<&videocc VCODEC1_GDSC>;
>> -			power-domain-names = "venus", "vcodec0", "vcodec1";
>> +					<&videocc VCODEC1_GDSC>,
>> +					<&rpmhpd SDM845_CX>;
>> +			power-domain-names = "venus", "vcodec0", "vcodec1", "cx";
>> +			operating-points-v2 = <&venus_opp_table>;
>>  			clocks = <&videocc VIDEO_CC_VENUS_CTL_CORE_CLK>,
>>  				 <&videocc VIDEO_CC_VENUS_AHB_CLK>,
>>  				 <&videocc VIDEO_CC_VENUS_CTL_AXI_CLK>,
>> @@ -3654,6 +3656,40 @@
>>  			video-core1 {
>>  				compatible = "venus-encoder";
>>  			};
>> +
>> +			venus_opp_table: venus-opp-table {
>> +				compatible = "operating-points-v2";
>> +
>> +				opp-100000000 {
>> +					opp-hz = /bits/ 64 <100000000>;
>> +					required-opps = <&rpmhpd_opp_min_svs>;
>> +				};
>> +
>> +				opp-200000000 {
>> +					opp-hz = /bits/ 64 <200000000>;
>> +					required-opps = <&rpmhpd_opp_low_svs>;
>> +				};
>> +
>> +				opp-320000000 {
>> +					opp-hz = /bits/ 64 <320000000>;
>> +					required-opps = <&rpmhpd_opp_svs>;
>> +				};
>> +
>> +				opp-380000000 {
>> +					opp-hz = /bits/ 64 <380000000>;
>> +					required-opps = <&rpmhpd_opp_svs_l1>;
>> +				};
>> +
>> +				opp-444000000 {
>> +					opp-hz = /bits/ 64 <444000000>;
>> +					required-opps = <&rpmhpd_opp_nom>;
>> +				};
>> +
>> +				opp-533000000 {
>> +					opp-hz = /bits/ 64 <533000000>;

Actually it comes from videocc, where ftbl_video_cc_venus_clk_src
defines 533000000 but the real calculated freq is 533000097.

If I change to opp-hz = /bits/ 64 <533000097> the error disappear.

I guess we have to revisit m/n and/or pre-divider for this freq when the
source pll is P_VIDEO_PLL0_OUT_MAIN PLL?

>> +					required-opps = <&rpmhpd_opp_turbo>;
>> +				};
>> +			};
>>  		};
>>  
>>  		videocc: clock-controller@ab00000 {
>>
>
Lina Iyer July 24, 2020, 4:28 p.m. UTC | #6
On Fri, Jul 24 2020 at 03:03 -0600, Rajendra Nayak wrote:
>Hi Maulik/Lina,
>
>On 7/23/2020 11:36 PM, Stanimir Varbanov wrote:
>>Hi Rajendra,
>>
>>After applying 2,3 and 4/5 patches on linaro-integration v5.8-rc2 I see
>>below messages on db845:
>>
>>qcom-venus aa00000.video-codec: dev_pm_opp_set_rate: failed to find
>>current OPP for freq 533000097 (-34)
>>
>>^^^ This one is new.
>>
>>qcom_rpmh TCS Busy, retrying RPMH message send: addr=0x30000
>>
>>^^^ and this message is annoying, can we make it pr_debug in rpmh?
>
How annoyingly often do you see this message?
Usually, this is an indication of bad system state either on remote
processors in the SoC or in Linux itself. On a smooth sailing build you
should not see this 'warning'.

>Would you be fine with moving this message to a pr_debug? Its currently
>a pr_info_ratelimited()
I would rather not, moving this out of sight will mask a lot serious
issues that otherwise bring attention to the developers.

--Lina
Stanimir Varbanov July 24, 2020, 4:52 p.m. UTC | #7
Hi Lina,

On 7/24/20 7:28 PM, Lina Iyer wrote:
> On Fri, Jul 24 2020 at 03:03 -0600, Rajendra Nayak wrote:
>> Hi Maulik/Lina,
>>
>> On 7/23/2020 11:36 PM, Stanimir Varbanov wrote:
>>> Hi Rajendra,
>>>
>>> After applying 2,3 and 4/5 patches on linaro-integration v5.8-rc2 I see
>>> below messages on db845:
>>>
>>> qcom-venus aa00000.video-codec: dev_pm_opp_set_rate: failed to find
>>> current OPP for freq 533000097 (-34)
>>>
>>> ^^^ This one is new.
>>>
>>> qcom_rpmh TCS Busy, retrying RPMH message send: addr=0x30000
>>>
>>> ^^^ and this message is annoying, can we make it pr_debug in rpmh?
>>
> How annoyingly often do you see this message?

I haven't gig deeply but on every driver pm_runtime_suspend (after
applying Rajendra's patches). And I guess it comes after a call to
dev_pm_opp_set_rate(dev, 0).

IMO this is too often.

> Usually, this is an indication of bad system state either on remote
> processors in the SoC or in Linux itself. On a smooth sailing build you
> should not see this 'warning'.
> 
>> Would you be fine with moving this message to a pr_debug? Its currently
>> a pr_info_ratelimited()
> I would rather not, moving this out of sight will mask a lot serious
> issues that otherwise bring attention to the developers.
> 
> --Lina
Stanimir Varbanov July 24, 2020, 5 p.m. UTC | #8
On 7/24/20 7:52 PM, Stanimir Varbanov wrote:
> Hi Lina,
> 
> On 7/24/20 7:28 PM, Lina Iyer wrote:
>> On Fri, Jul 24 2020 at 03:03 -0600, Rajendra Nayak wrote:
>>> Hi Maulik/Lina,
>>>
>>> On 7/23/2020 11:36 PM, Stanimir Varbanov wrote:
>>>> Hi Rajendra,
>>>>
>>>> After applying 2,3 and 4/5 patches on linaro-integration v5.8-rc2 I see
>>>> below messages on db845:
>>>>
>>>> qcom-venus aa00000.video-codec: dev_pm_opp_set_rate: failed to find
>>>> current OPP for freq 533000097 (-34)
>>>>
>>>> ^^^ This one is new.
>>>>
>>>> qcom_rpmh TCS Busy, retrying RPMH message send: addr=0x30000
>>>>
>>>> ^^^ and this message is annoying, can we make it pr_debug in rpmh?
>>>
>> How annoyingly often do you see this message?
> 
> I haven't gig deeply but on every driver pm_runtime_suspend (after
> applying Rajendra's patches). And I guess it comes after a call to
> dev_pm_opp_set_rate(dev, 0).

Or it might be when the driver is switching off opp_pmdomain.

> 
> IMO this is too often.
> 
>> Usually, this is an indication of bad system state either on remote
>> processors in the SoC or in Linux itself. On a smooth sailing build you
>> should not see this 'warning'.
>>
>>> Would you be fine with moving this message to a pr_debug? Its currently
>>> a pr_info_ratelimited()
>> I would rather not, moving this out of sight will mask a lot serious
>> issues that otherwise bring attention to the developers.
>>
>> --Lina
>
Rajendra Nayak July 27, 2020, 5:53 a.m. UTC | #9
On 7/24/2020 7:39 PM, Stanimir Varbanov wrote:
> Hi,
> 
> On 7/23/20 9:06 PM, Stanimir Varbanov wrote:
>> Hi Rajendra,
>>
>> After applying 2,3 and 4/5 patches on linaro-integration v5.8-rc2 I see
>> below messages on db845:
>>
>> qcom-venus aa00000.video-codec: dev_pm_opp_set_rate: failed to find
>> current OPP for freq 533000097 (-34)
>>
>> ^^^ This one is new.
>>
>> qcom_rpmh TCS Busy, retrying RPMH message send: addr=0x30000
>>
>> ^^^ and this message is annoying, can we make it pr_debug in rpmh?
>>
>> On 7/23/20 2:26 PM, Rajendra Nayak wrote:
>>> Add the OPP tables in order to be able to vote on the performance state of
>>> a power-domain.
>>>
>>> Signed-off-by: Rajendra Nayak <rnayak@codeaurora.org>
>>> ---
>>>   arch/arm64/boot/dts/qcom/sdm845.dtsi | 40 ++++++++++++++++++++++++++++++++++--
>>>   1 file changed, 38 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/arch/arm64/boot/dts/qcom/sdm845.dtsi b/arch/arm64/boot/dts/qcom/sdm845.dtsi
>>> index e506793..5ca2265 100644
>>> --- a/arch/arm64/boot/dts/qcom/sdm845.dtsi
>>> +++ b/arch/arm64/boot/dts/qcom/sdm845.dtsi
>>> @@ -3631,8 +3631,10 @@
>>>   			interrupts = <GIC_SPI 174 IRQ_TYPE_LEVEL_HIGH>;
>>>   			power-domains = <&videocc VENUS_GDSC>,
>>>   					<&videocc VCODEC0_GDSC>,
>>> -					<&videocc VCODEC1_GDSC>;
>>> -			power-domain-names = "venus", "vcodec0", "vcodec1";
>>> +					<&videocc VCODEC1_GDSC>,
>>> +					<&rpmhpd SDM845_CX>;
>>> +			power-domain-names = "venus", "vcodec0", "vcodec1", "cx";
>>> +			operating-points-v2 = <&venus_opp_table>;
>>>   			clocks = <&videocc VIDEO_CC_VENUS_CTL_CORE_CLK>,
>>>   				 <&videocc VIDEO_CC_VENUS_AHB_CLK>,
>>>   				 <&videocc VIDEO_CC_VENUS_CTL_AXI_CLK>,
>>> @@ -3654,6 +3656,40 @@
>>>   			video-core1 {
>>>   				compatible = "venus-encoder";
>>>   			};
>>> +
>>> +			venus_opp_table: venus-opp-table {
>>> +				compatible = "operating-points-v2";
>>> +
>>> +				opp-100000000 {
>>> +					opp-hz = /bits/ 64 <100000000>;
>>> +					required-opps = <&rpmhpd_opp_min_svs>;
>>> +				};
>>> +
>>> +				opp-200000000 {
>>> +					opp-hz = /bits/ 64 <200000000>;
>>> +					required-opps = <&rpmhpd_opp_low_svs>;
>>> +				};
>>> +
>>> +				opp-320000000 {
>>> +					opp-hz = /bits/ 64 <320000000>;
>>> +					required-opps = <&rpmhpd_opp_svs>;
>>> +				};
>>> +
>>> +				opp-380000000 {
>>> +					opp-hz = /bits/ 64 <380000000>;
>>> +					required-opps = <&rpmhpd_opp_svs_l1>;
>>> +				};
>>> +
>>> +				opp-444000000 {
>>> +					opp-hz = /bits/ 64 <444000000>;
>>> +					required-opps = <&rpmhpd_opp_nom>;
>>> +				};
>>> +
>>> +				opp-533000000 {
>>> +					opp-hz = /bits/ 64 <533000000>;
> 
> Actually it comes from videocc, where ftbl_video_cc_venus_clk_src
> defines 533000000 but the real calculated freq is 533000097.

I still don't quite understand why the videocc driver returns this
frequency despite this not being in the freq table.
I would expect a clk_round_rate() when called with 533000097 to return
a 533000000.

Taniya, Do you know why?

> 
> If I change to opp-hz = /bits/ 64 <533000097> the error disappear.
> 
> I guess we have to revisit m/n and/or pre-divider for this freq when the
> source pll is P_VIDEO_PLL0_OUT_MAIN PLL?
> 
>>> +					required-opps = <&rpmhpd_opp_turbo>;
>>> +				};
>>> +			};
>>>   		};
>>>   
>>>   		videocc: clock-controller@ab00000 {
>>>
>>
>
Rajendra Nayak July 27, 2020, 12:08 p.m. UTC | #10
On 7/27/2020 11:23 AM, Rajendra Nayak wrote:
> 
> 
> On 7/24/2020 7:39 PM, Stanimir Varbanov wrote:
>> Hi,
>>
>> On 7/23/20 9:06 PM, Stanimir Varbanov wrote:
>>> Hi Rajendra,
>>>
>>> After applying 2,3 and 4/5 patches on linaro-integration v5.8-rc2 I see
>>> below messages on db845:
>>>
>>> qcom-venus aa00000.video-codec: dev_pm_opp_set_rate: failed to find
>>> current OPP for freq 533000097 (-34)
>>>
>>> ^^^ This one is new.
>>>
>>> qcom_rpmh TCS Busy, retrying RPMH message send: addr=0x30000
>>>
>>> ^^^ and this message is annoying, can we make it pr_debug in rpmh?
>>>
>>> On 7/23/20 2:26 PM, Rajendra Nayak wrote:
>>>> Add the OPP tables in order to be able to vote on the performance state of
>>>> a power-domain.
>>>>
>>>> Signed-off-by: Rajendra Nayak <rnayak@codeaurora.org>
>>>> ---
>>>>   arch/arm64/boot/dts/qcom/sdm845.dtsi | 40 ++++++++++++++++++++++++++++++++++--
>>>>   1 file changed, 38 insertions(+), 2 deletions(-)
>>>>
>>>> diff --git a/arch/arm64/boot/dts/qcom/sdm845.dtsi b/arch/arm64/boot/dts/qcom/sdm845.dtsi
>>>> index e506793..5ca2265 100644
>>>> --- a/arch/arm64/boot/dts/qcom/sdm845.dtsi
>>>> +++ b/arch/arm64/boot/dts/qcom/sdm845.dtsi
>>>> @@ -3631,8 +3631,10 @@
>>>>               interrupts = <GIC_SPI 174 IRQ_TYPE_LEVEL_HIGH>;
>>>>               power-domains = <&videocc VENUS_GDSC>,
>>>>                       <&videocc VCODEC0_GDSC>,
>>>> -                    <&videocc VCODEC1_GDSC>;
>>>> -            power-domain-names = "venus", "vcodec0", "vcodec1";
>>>> +                    <&videocc VCODEC1_GDSC>,
>>>> +                    <&rpmhpd SDM845_CX>;
>>>> +            power-domain-names = "venus", "vcodec0", "vcodec1", "cx";
>>>> +            operating-points-v2 = <&venus_opp_table>;
>>>>               clocks = <&videocc VIDEO_CC_VENUS_CTL_CORE_CLK>,
>>>>                    <&videocc VIDEO_CC_VENUS_AHB_CLK>,
>>>>                    <&videocc VIDEO_CC_VENUS_CTL_AXI_CLK>,
>>>> @@ -3654,6 +3656,40 @@
>>>>               video-core1 {
>>>>                   compatible = "venus-encoder";
>>>>               };
>>>> +
>>>> +            venus_opp_table: venus-opp-table {
>>>> +                compatible = "operating-points-v2";
>>>> +
>>>> +                opp-100000000 {
>>>> +                    opp-hz = /bits/ 64 <100000000>;
>>>> +                    required-opps = <&rpmhpd_opp_min_svs>;
>>>> +                };
>>>> +
>>>> +                opp-200000000 {
>>>> +                    opp-hz = /bits/ 64 <200000000>;
>>>> +                    required-opps = <&rpmhpd_opp_low_svs>;
>>>> +                };
>>>> +
>>>> +                opp-320000000 {
>>>> +                    opp-hz = /bits/ 64 <320000000>;
>>>> +                    required-opps = <&rpmhpd_opp_svs>;
>>>> +                };
>>>> +
>>>> +                opp-380000000 {
>>>> +                    opp-hz = /bits/ 64 <380000000>;
>>>> +                    required-opps = <&rpmhpd_opp_svs_l1>;
>>>> +                };
>>>> +
>>>> +                opp-444000000 {
>>>> +                    opp-hz = /bits/ 64 <444000000>;
>>>> +                    required-opps = <&rpmhpd_opp_nom>;
>>>> +                };
>>>> +
>>>> +                opp-533000000 {
>>>> +                    opp-hz = /bits/ 64 <533000000>;
>>
>> Actually it comes from videocc, where ftbl_video_cc_venus_clk_src
>> defines 533000000 but the real calculated freq is 533000097.
> 
> I still don't quite understand why the videocc driver returns this
> frequency despite this not being in the freq table.

Ok, so I see the same issue on sc7180 also. clk_round_rate() does seem to
return whats in the freq table, but clk_set_rate() goes ahead and sets it
to 533000097. Subsequently when we try to set a different OPP, it fails to
find the 'current' OPP entry for 533000097. This sounds like an issue with the OPP
framework? Should we not fall back to the highest OPP as the current OPP?

Stephen/Viresh, any thoughts?

> I would expect a clk_round_rate() when called with 533000097 to return
> a 533000000.
> 
> Taniya, Do you know why?
> 
>>
>> If I change to opp-hz = /bits/ 64 <533000097> the error disappear.
>>
>> I guess we have to revisit m/n and/or pre-divider for this freq when the
>> source pll is P_VIDEO_PLL0_OUT_MAIN PLL?
>>
>>>> +                    required-opps = <&rpmhpd_opp_turbo>;
>>>> +                };
>>>> +            };
>>>>           };
>>>>           videocc: clock-controller@ab00000 {
>>>>
>>>
>>
>
Viresh Kumar July 27, 2020, 3:38 p.m. UTC | #11
On 27-07-20, 17:38, Rajendra Nayak wrote:
> 
> On 7/27/2020 11:23 AM, Rajendra Nayak wrote:
> > 
> > 
> > On 7/24/2020 7:39 PM, Stanimir Varbanov wrote:
> > > Hi,
> > > 
> > > On 7/23/20 9:06 PM, Stanimir Varbanov wrote:
> > > > Hi Rajendra,
> > > > 
> > > > After applying 2,3 and 4/5 patches on linaro-integration v5.8-rc2 I see
> > > > below messages on db845:
> > > > 
> > > > qcom-venus aa00000.video-codec: dev_pm_opp_set_rate: failed to find
> > > > current OPP for freq 533000097 (-34)
> > > > 
> > > > ^^^ This one is new.
> > > > 
> > > > qcom_rpmh TCS Busy, retrying RPMH message send: addr=0x30000
> > > > 
> > > > ^^^ and this message is annoying, can we make it pr_debug in rpmh?
> > > > 
> > > > On 7/23/20 2:26 PM, Rajendra Nayak wrote:
> > > > > Add the OPP tables in order to be able to vote on the performance state of
> > > > > a power-domain.
> > > > > 
> > > > > Signed-off-by: Rajendra Nayak <rnayak@codeaurora.org>
> > > > > ---
> > > > >   arch/arm64/boot/dts/qcom/sdm845.dtsi | 40 ++++++++++++++++++++++++++++++++++--
> > > > >   1 file changed, 38 insertions(+), 2 deletions(-)
> > > > > 
> > > > > diff --git a/arch/arm64/boot/dts/qcom/sdm845.dtsi b/arch/arm64/boot/dts/qcom/sdm845.dtsi
> > > > > index e506793..5ca2265 100644
> > > > > --- a/arch/arm64/boot/dts/qcom/sdm845.dtsi
> > > > > +++ b/arch/arm64/boot/dts/qcom/sdm845.dtsi
> > > > > @@ -3631,8 +3631,10 @@
> > > > >               interrupts = <GIC_SPI 174 IRQ_TYPE_LEVEL_HIGH>;
> > > > >               power-domains = <&videocc VENUS_GDSC>,
> > > > >                       <&videocc VCODEC0_GDSC>,
> > > > > -                    <&videocc VCODEC1_GDSC>;
> > > > > -            power-domain-names = "venus", "vcodec0", "vcodec1";
> > > > > +                    <&videocc VCODEC1_GDSC>,
> > > > > +                    <&rpmhpd SDM845_CX>;
> > > > > +            power-domain-names = "venus", "vcodec0", "vcodec1", "cx";
> > > > > +            operating-points-v2 = <&venus_opp_table>;
> > > > >               clocks = <&videocc VIDEO_CC_VENUS_CTL_CORE_CLK>,
> > > > >                    <&videocc VIDEO_CC_VENUS_AHB_CLK>,
> > > > >                    <&videocc VIDEO_CC_VENUS_CTL_AXI_CLK>,
> > > > > @@ -3654,6 +3656,40 @@
> > > > >               video-core1 {
> > > > >                   compatible = "venus-encoder";
> > > > >               };
> > > > > +
> > > > > +            venus_opp_table: venus-opp-table {
> > > > > +                compatible = "operating-points-v2";
> > > > > +
> > > > > +                opp-100000000 {
> > > > > +                    opp-hz = /bits/ 64 <100000000>;
> > > > > +                    required-opps = <&rpmhpd_opp_min_svs>;
> > > > > +                };
> > > > > +
> > > > > +                opp-200000000 {
> > > > > +                    opp-hz = /bits/ 64 <200000000>;
> > > > > +                    required-opps = <&rpmhpd_opp_low_svs>;
> > > > > +                };
> > > > > +
> > > > > +                opp-320000000 {
> > > > > +                    opp-hz = /bits/ 64 <320000000>;
> > > > > +                    required-opps = <&rpmhpd_opp_svs>;
> > > > > +                };
> > > > > +
> > > > > +                opp-380000000 {
> > > > > +                    opp-hz = /bits/ 64 <380000000>;
> > > > > +                    required-opps = <&rpmhpd_opp_svs_l1>;
> > > > > +                };
> > > > > +
> > > > > +                opp-444000000 {
> > > > > +                    opp-hz = /bits/ 64 <444000000>;
> > > > > +                    required-opps = <&rpmhpd_opp_nom>;
> > > > > +                };
> > > > > +
> > > > > +                opp-533000000 {
> > > > > +                    opp-hz = /bits/ 64 <533000000>;

Is this the highest OPP in table ?

> > > Actually it comes from videocc, where ftbl_video_cc_venus_clk_src
> > > defines 533000000 but the real calculated freq is 533000097.
> > 
> > I still don't quite understand why the videocc driver returns this
> > frequency despite this not being in the freq table.
> 
> Ok, so I see the same issue on sc7180 also. clk_round_rate() does seem to
> return whats in the freq table, but clk_set_rate() goes ahead and sets it
> to 533000097. Subsequently when we try to set a different OPP, it fails to
> find the 'current' OPP entry for 533000097. This sounds like an issue with the OPP
> framework? Should we not fall back to the highest OPP as the current OPP?
> 
> Stephen/Viresh, any thoughts?

I think we (in all frameworks generally) try to set a frequency <=
target frequency and so there may be a problem if the frequency is
larger than highest supported. IOW, you need to fix tables a bit.
Stephen Boyd July 28, 2020, 12:45 a.m. UTC | #12
Quoting Lina Iyer (2020-07-24 09:28:25)
> On Fri, Jul 24 2020 at 03:03 -0600, Rajendra Nayak wrote:
> >Hi Maulik/Lina,
> >
> >On 7/23/2020 11:36 PM, Stanimir Varbanov wrote:
> >>Hi Rajendra,
> >>
> >>After applying 2,3 and 4/5 patches on linaro-integration v5.8-rc2 I see
> >>below messages on db845:
> >>
> >>qcom-venus aa00000.video-codec: dev_pm_opp_set_rate: failed to find
> >>current OPP for freq 533000097 (-34)
> >>
> >>^^^ This one is new.
> >>
> >>qcom_rpmh TCS Busy, retrying RPMH message send: addr=0x30000
> >>
> >>^^^ and this message is annoying, can we make it pr_debug in rpmh?
> >
> How annoyingly often do you see this message?
> Usually, this is an indication of bad system state either on remote
> processors in the SoC or in Linux itself. On a smooth sailing build you
> should not see this 'warning'.
> 
> >Would you be fine with moving this message to a pr_debug? Its currently
> >a pr_info_ratelimited()
> I would rather not, moving this out of sight will mask a lot serious
> issues that otherwise bring attention to the developers.
> 

I removed this warning message in my patch posted to the list[1]. If
it's a serious problem then I suppose a timeout is more appropriate, on
the order of several seconds or so and then a pr_warn() and bail out of
the async call with an error.

[1] https://lore.kernel.org/r/20200724211711.810009-1-sboyd@kernel.org
Stephen Boyd July 28, 2020, 12:52 a.m. UTC | #13
Quoting Viresh Kumar (2020-07-27 08:38:06)
> On 27-07-20, 17:38, Rajendra Nayak wrote:
> > On 7/27/2020 11:23 AM, Rajendra Nayak wrote:
> > > On 7/24/2020 7:39 PM, Stanimir Varbanov wrote:
> > > > > > +
> > > > > > +                opp-533000000 {
> > > > > > +                    opp-hz = /bits/ 64 <533000000>;
> 
> Is this the highest OPP in table ?
> 
> > > > Actually it comes from videocc, where ftbl_video_cc_venus_clk_src
> > > > defines 533000000 but the real calculated freq is 533000097.
> > > 
> > > I still don't quite understand why the videocc driver returns this
> > > frequency despite this not being in the freq table.
> > 
> > Ok, so I see the same issue on sc7180 also. clk_round_rate() does seem to
> > return whats in the freq table, but clk_set_rate() goes ahead and sets it

I'm happy to see clk_round_rate() return the actual rate that would be
achieved and not just the rate that is in the frequency tables. Would
that fix the problem? It may be that we need to make clk_round_rate() do
some more math on qcom platforms and actually figure out what the rate
is going to be instead of blindly trust the frequency that has been set
in the tables.

> > to 533000097. Subsequently when we try to set a different OPP, it fails to
> > find the 'current' OPP entry for 533000097. This sounds like an issue with the OPP
> > framework? Should we not fall back to the highest OPP as the current OPP?
> > 
> > Stephen/Viresh, any thoughts?
> 
> I think we (in all frameworks generally) try to set a frequency <=
> target frequency and so there may be a problem if the frequency is
> larger than highest supported. IOW, you need to fix tables a bit.
> 

Rounding is annoying for sure.
Rajendra Nayak July 28, 2020, 4:17 a.m. UTC | #14
On 7/28/2020 6:22 AM, Stephen Boyd wrote:
> Quoting Viresh Kumar (2020-07-27 08:38:06)
>> On 27-07-20, 17:38, Rajendra Nayak wrote:
>>> On 7/27/2020 11:23 AM, Rajendra Nayak wrote:
>>>> On 7/24/2020 7:39 PM, Stanimir Varbanov wrote:
>>>>>>> +
>>>>>>> +                opp-533000000 {
>>>>>>> +                    opp-hz = /bits/ 64 <533000000>;
>>
>> Is this the highest OPP in table ?
>>
>>>>> Actually it comes from videocc, where ftbl_video_cc_venus_clk_src
>>>>> defines 533000000 but the real calculated freq is 533000097.
>>>>
>>>> I still don't quite understand why the videocc driver returns this
>>>> frequency despite this not being in the freq table.
>>>
>>> Ok, so I see the same issue on sc7180 also. clk_round_rate() does seem to
>>> return whats in the freq table, but clk_set_rate() goes ahead and sets it
> 
> I'm happy to see clk_round_rate() return the actual rate that would be
> achieved and not just the rate that is in the frequency tables. Would
> that fix the problem? 

It would, but only if I also update the OPP table to have 533000097
instead of 533000000 (which I guess is needed anyway)
If this is the actual frequency that's achievable, then perhaps even the clock
freq table should have this? 533000097 and not 533000000?
That way clk_round_rate() would return the actual rate that's achieved and
we don't need any extra math. Isn't that the reason these freq tables exist
anyway.

> It may be that we need to make clk_round_rate() do
> some more math on qcom platforms and actually figure out what the rate
> is going to be instead of blindly trust the frequency that has been set
> in the tables.
> 
>>> to 533000097. Subsequently when we try to set a different OPP, it fails to
>>> find the 'current' OPP entry for 533000097. This sounds like an issue with the OPP
>>> framework? Should we not fall back to the highest OPP as the current OPP?
>>>
>>> Stephen/Viresh, any thoughts?
>>
>> I think we (in all frameworks generally) try to set a frequency <=
>> target frequency and so there may be a problem if the frequency is
>> larger than highest supported. IOW, you need to fix tables a bit.
>>
> 
> Rounding is annoying for sure.
>
Lina Iyer July 28, 2020, 4:52 p.m. UTC | #15
On Mon, Jul 27 2020 at 18:45 -0600, Stephen Boyd wrote:
>Quoting Lina Iyer (2020-07-24 09:28:25)
>> On Fri, Jul 24 2020 at 03:03 -0600, Rajendra Nayak wrote:
>> >Hi Maulik/Lina,
>> >
>> >On 7/23/2020 11:36 PM, Stanimir Varbanov wrote:
>> >>Hi Rajendra,
>> >>
>> >>After applying 2,3 and 4/5 patches on linaro-integration v5.8-rc2 I see
>> >>below messages on db845:
>> >>
>> >>qcom-venus aa00000.video-codec: dev_pm_opp_set_rate: failed to find
>> >>current OPP for freq 533000097 (-34)
>> >>
>> >>^^^ This one is new.
>> >>
>> >>qcom_rpmh TCS Busy, retrying RPMH message send: addr=0x30000
>> >>
>> >>^^^ and this message is annoying, can we make it pr_debug in rpmh?
>> >
>> How annoyingly often do you see this message?
>> Usually, this is an indication of bad system state either on remote
>> processors in the SoC or in Linux itself. On a smooth sailing build you
>> should not see this 'warning'.
>>
>> >Would you be fine with moving this message to a pr_debug? Its currently
>> >a pr_info_ratelimited()
>> I would rather not, moving this out of sight will mask a lot serious
>> issues that otherwise bring attention to the developers.
>>
>
>I removed this warning message in my patch posted to the list[1]. If
>it's a serious problem then I suppose a timeout is more appropriate, on
>the order of several seconds or so and then a pr_warn() and bail out of
>the async call with an error.
>
The warning used to capture issues that happen within a second and it
helps capture system related issues. Timing out after many seconds
overlooks the system issues that generally tend to resolve itself, but
nevertheless need to be investigated.

--Lina

>[1] https://lore.kernel.org/r/20200724211711.810009-1-sboyd@kernel.org
Stephen Boyd July 28, 2020, 7:51 p.m. UTC | #16
Quoting Lina Iyer (2020-07-28 09:52:12)
> On Mon, Jul 27 2020 at 18:45 -0600, Stephen Boyd wrote:
> >Quoting Lina Iyer (2020-07-24 09:28:25)
> >> On Fri, Jul 24 2020 at 03:03 -0600, Rajendra Nayak wrote:
> >> >Hi Maulik/Lina,
> >> >
> >> >On 7/23/2020 11:36 PM, Stanimir Varbanov wrote:
> >> >>Hi Rajendra,
> >> >>
> >> >>After applying 2,3 and 4/5 patches on linaro-integration v5.8-rc2 I see
> >> >>below messages on db845:
> >> >>
> >> >>qcom-venus aa00000.video-codec: dev_pm_opp_set_rate: failed to find
> >> >>current OPP for freq 533000097 (-34)
> >> >>
> >> >>^^^ This one is new.
> >> >>
> >> >>qcom_rpmh TCS Busy, retrying RPMH message send: addr=0x30000
> >> >>
> >> >>^^^ and this message is annoying, can we make it pr_debug in rpmh?
> >> >
> >> How annoyingly often do you see this message?
> >> Usually, this is an indication of bad system state either on remote
> >> processors in the SoC or in Linux itself. On a smooth sailing build you
> >> should not see this 'warning'.
> >>
> >> >Would you be fine with moving this message to a pr_debug? Its currently
> >> >a pr_info_ratelimited()
> >> I would rather not, moving this out of sight will mask a lot serious
> >> issues that otherwise bring attention to the developers.
> >>
> >
> >I removed this warning message in my patch posted to the list[1]. If
> >it's a serious problem then I suppose a timeout is more appropriate, on
> >the order of several seconds or so and then a pr_warn() and bail out of
> >the async call with an error.
> >
> The warning used to capture issues that happen within a second and it
> helps capture system related issues. Timing out after many seconds
> overlooks the system issues that generally tend to resolve itself, but
> nevertheless need to be investigated.
> 

Is it correct to read "system related issues" as performance problems
where the thread is spinning forever trying to send a message and it
can't? So the problem is mostly that it's an unbounded amount of time
before the message is sent to rpmh and this printk helps identify those
situations where that is happening?

Otherwise as you say above it's a bad system state where the rpmh
processor has gotten into a bad state like a crash? Can we recover from
that? Or is the only recovery a reboot of the system? Does the rpmh
processor reboot the system if it crashes?
Stephen Boyd July 28, 2020, 7:54 p.m. UTC | #17
Quoting Rajendra Nayak (2020-07-27 21:17:28)
> 
> On 7/28/2020 6:22 AM, Stephen Boyd wrote:
> > Quoting Viresh Kumar (2020-07-27 08:38:06)
> >> On 27-07-20, 17:38, Rajendra Nayak wrote:
> >>> On 7/27/2020 11:23 AM, Rajendra Nayak wrote:
> >>>> On 7/24/2020 7:39 PM, Stanimir Varbanov wrote:
> >>>>>>> +
> >>>>>>> +                opp-533000000 {
> >>>>>>> +                    opp-hz = /bits/ 64 <533000000>;
> >>
> >> Is this the highest OPP in table ?
> >>
> >>>>> Actually it comes from videocc, where ftbl_video_cc_venus_clk_src
> >>>>> defines 533000000 but the real calculated freq is 533000097.
> >>>>
> >>>> I still don't quite understand why the videocc driver returns this
> >>>> frequency despite this not being in the freq table.
> >>>
> >>> Ok, so I see the same issue on sc7180 also. clk_round_rate() does seem to
> >>> return whats in the freq table, but clk_set_rate() goes ahead and sets it
> > 
> > I'm happy to see clk_round_rate() return the actual rate that would be
> > achieved and not just the rate that is in the frequency tables. Would
> > that fix the problem? 
> 
> It would, but only if I also update the OPP table to have 533000097
> instead of 533000000 (which I guess is needed anyway)
> If this is the actual frequency that's achievable, then perhaps even the clock
> freq table should have this? 533000097 and not 533000000?
> That way clk_round_rate() would return the actual rate that's achieved and
> we don't need any extra math. Isn't that the reason these freq tables exist
> anyway.

Yes the freq tables are there in the clk driver so we don't have to do a
bunch of math. Fixing them to be accurate has been deemed "hard" from
what I recall because the tables are generated from some math function
that truncates the lower Hertz values.
Lina Iyer July 28, 2020, 8:11 p.m. UTC | #18
On Tue, Jul 28 2020 at 13:51 -0600, Stephen Boyd wrote:
>Quoting Lina Iyer (2020-07-28 09:52:12)
>> On Mon, Jul 27 2020 at 18:45 -0600, Stephen Boyd wrote:
>> >Quoting Lina Iyer (2020-07-24 09:28:25)
>> >> On Fri, Jul 24 2020 at 03:03 -0600, Rajendra Nayak wrote:
>> >> >Hi Maulik/Lina,
>> >> >
>> >> >On 7/23/2020 11:36 PM, Stanimir Varbanov wrote:
>> >> >>Hi Rajendra,
>> >> >>
>> >> >>After applying 2,3 and 4/5 patches on linaro-integration v5.8-rc2 I see
>> >> >>below messages on db845:
>> >> >>
>> >> >>qcom-venus aa00000.video-codec: dev_pm_opp_set_rate: failed to find
>> >> >>current OPP for freq 533000097 (-34)
>> >> >>
>> >> >>^^^ This one is new.
>> >> >>
>> >> >>qcom_rpmh TCS Busy, retrying RPMH message send: addr=0x30000
>> >> >>
>> >> >>^^^ and this message is annoying, can we make it pr_debug in rpmh?
>> >> >
>> >> How annoyingly often do you see this message?
>> >> Usually, this is an indication of bad system state either on remote
>> >> processors in the SoC or in Linux itself. On a smooth sailing build you
>> >> should not see this 'warning'.
>> >>
>> >> >Would you be fine with moving this message to a pr_debug? Its currently
>> >> >a pr_info_ratelimited()
>> >> I would rather not, moving this out of sight will mask a lot serious
>> >> issues that otherwise bring attention to the developers.
>> >>
>> >
>> >I removed this warning message in my patch posted to the list[1]. If
>> >it's a serious problem then I suppose a timeout is more appropriate, on
>> >the order of several seconds or so and then a pr_warn() and bail out of
>> >the async call with an error.
>> >
>> The warning used to capture issues that happen within a second and it
>> helps capture system related issues. Timing out after many seconds
>> overlooks the system issues that generally tend to resolve itself, but
>> nevertheless need to be investigated.
>>
>
>Is it correct to read "system related issues" as performance problems
>where the thread is spinning forever trying to send a message and it
>can't? So the problem is mostly that it's an unbounded amount of time
>before the message is sent to rpmh and this printk helps identify those
>situations where that is happening?
>
Yes, but mostly a short period of time like when other processors are in
the middle of a restart or resource states changes have taken unusual
amounts of time. The system will generally recover from this without
crashing in this case. User action is investigation of the situation
leading to these messages.

>Otherwise as you say above it's a bad system state where the rpmh
>processor has gotten into a bad state like a crash? Can we recover from
>that? Or is the only recovery a reboot of the system? Does the rpmh
>processor reboot the system if it crashes?
We cannot recover from such a state. The remote processor will reboot if
it detects a failure at it's end. If the system entered a bad state, it
is possible that RPMH requests start timing out in Linux and remote
processor may not detect it. Hence, the timeout in rpmh_write() API. The
advised course of action is a restart as there is no way to recover from
this state.

--Lina
Doug Anderson July 29, 2020, 6:10 p.m. UTC | #19
Hi,

On Tue, Jul 28, 2020 at 1:11 PM Lina Iyer <ilina@codeaurora.org> wrote:
>
> On Tue, Jul 28 2020 at 13:51 -0600, Stephen Boyd wrote:
> >Quoting Lina Iyer (2020-07-28 09:52:12)
> >> On Mon, Jul 27 2020 at 18:45 -0600, Stephen Boyd wrote:
> >> >Quoting Lina Iyer (2020-07-24 09:28:25)
> >> >> On Fri, Jul 24 2020 at 03:03 -0600, Rajendra Nayak wrote:
> >> >> >Hi Maulik/Lina,
> >> >> >
> >> >> >On 7/23/2020 11:36 PM, Stanimir Varbanov wrote:
> >> >> >>Hi Rajendra,
> >> >> >>
> >> >> >>After applying 2,3 and 4/5 patches on linaro-integration v5.8-rc2 I see
> >> >> >>below messages on db845:
> >> >> >>
> >> >> >>qcom-venus aa00000.video-codec: dev_pm_opp_set_rate: failed to find
> >> >> >>current OPP for freq 533000097 (-34)
> >> >> >>
> >> >> >>^^^ This one is new.
> >> >> >>
> >> >> >>qcom_rpmh TCS Busy, retrying RPMH message send: addr=0x30000
> >> >> >>
> >> >> >>^^^ and this message is annoying, can we make it pr_debug in rpmh?
> >> >> >
> >> >> How annoyingly often do you see this message?
> >> >> Usually, this is an indication of bad system state either on remote
> >> >> processors in the SoC or in Linux itself. On a smooth sailing build you
> >> >> should not see this 'warning'.
> >> >>
> >> >> >Would you be fine with moving this message to a pr_debug? Its currently
> >> >> >a pr_info_ratelimited()
> >> >> I would rather not, moving this out of sight will mask a lot serious
> >> >> issues that otherwise bring attention to the developers.
> >> >>
> >> >
> >> >I removed this warning message in my patch posted to the list[1]. If
> >> >it's a serious problem then I suppose a timeout is more appropriate, on
> >> >the order of several seconds or so and then a pr_warn() and bail out of
> >> >the async call with an error.
> >> >
> >> The warning used to capture issues that happen within a second and it
> >> helps capture system related issues. Timing out after many seconds
> >> overlooks the system issues that generally tend to resolve itself, but
> >> nevertheless need to be investigated.
> >>
> >
> >Is it correct to read "system related issues" as performance problems
> >where the thread is spinning forever trying to send a message and it
> >can't? So the problem is mostly that it's an unbounded amount of time
> >before the message is sent to rpmh and this printk helps identify those
> >situations where that is happening?
> >
> Yes, but mostly a short period of time like when other processors are in
> the middle of a restart or resource states changes have taken unusual
> amounts of time. The system will generally recover from this without
> crashing in this case. User action is investigation of the situation
> leading to these messages.

While I do agree that seeing the "TCS Busy, retrying RPMH message
send" message printed a lot was usually a sign that something was
wrong in the system (possibly someone was spamming RPMh when they
shouldn't be), it still feels like we need to remove it.
Specifically, the prints would also sometimes come up in normal usage
and always sounded a bit scary.  These types of prints always confuse
people and lead to log pollution where it's super hard to figure out
which of the various things in a log are "expected" and which ones are
relevant to whatever issue you're debugging.

Presumably we could either change that from a "info" level to "dbg"
level.  ...or we could find some other thing to check for that's a
better signal of problems.


> >Otherwise as you say above it's a bad system state where the rpmh
> >processor has gotten into a bad state like a crash? Can we recover from
> >that? Or is the only recovery a reboot of the system? Does the rpmh
> >processor reboot the system if it crashes?
> We cannot recover from such a state. The remote processor will reboot if
> it detects a failure at it's end. If the system entered a bad state, it
> is possible that RPMH requests start timing out in Linux and remote
> processor may not detect it. Hence, the timeout in rpmh_write() API. The
> advised course of action is a restart as there is no way to recover from
> this state.
>
> --Lina
>
>
Bjorn Andersson July 29, 2020, 8:38 p.m. UTC | #20
On Tue 28 Jul 13:11 PDT 2020, Lina Iyer wrote:

> On Tue, Jul 28 2020 at 13:51 -0600, Stephen Boyd wrote:
> > Quoting Lina Iyer (2020-07-28 09:52:12)
> > > On Mon, Jul 27 2020 at 18:45 -0600, Stephen Boyd wrote:
> > > >Quoting Lina Iyer (2020-07-24 09:28:25)
> > > >> On Fri, Jul 24 2020 at 03:03 -0600, Rajendra Nayak wrote:
> > > >> >Hi Maulik/Lina,
> > > >> >
> > > >> >On 7/23/2020 11:36 PM, Stanimir Varbanov wrote:
> > > >> >>Hi Rajendra,
> > > >> >>
> > > >> >>After applying 2,3 and 4/5 patches on linaro-integration v5.8-rc2 I see
> > > >> >>below messages on db845:
> > > >> >>
> > > >> >>qcom-venus aa00000.video-codec: dev_pm_opp_set_rate: failed to find
> > > >> >>current OPP for freq 533000097 (-34)
> > > >> >>
> > > >> >>^^^ This one is new.
> > > >> >>
> > > >> >>qcom_rpmh TCS Busy, retrying RPMH message send: addr=0x30000
> > > >> >>
> > > >> >>^^^ and this message is annoying, can we make it pr_debug in rpmh?
> > > >> >
> > > >> How annoyingly often do you see this message?
> > > >> Usually, this is an indication of bad system state either on remote
> > > >> processors in the SoC or in Linux itself. On a smooth sailing build you
> > > >> should not see this 'warning'.
> > > >>
> > > >> >Would you be fine with moving this message to a pr_debug? Its currently
> > > >> >a pr_info_ratelimited()
> > > >> I would rather not, moving this out of sight will mask a lot serious
> > > >> issues that otherwise bring attention to the developers.
> > > >>
> > > >
> > > >I removed this warning message in my patch posted to the list[1]. If
> > > >it's a serious problem then I suppose a timeout is more appropriate, on
> > > >the order of several seconds or so and then a pr_warn() and bail out of
> > > >the async call with an error.
> > > >
> > > The warning used to capture issues that happen within a second and it
> > > helps capture system related issues. Timing out after many seconds
> > > overlooks the system issues that generally tend to resolve itself, but
> > > nevertheless need to be investigated.
> > > 
> > 
> > Is it correct to read "system related issues" as performance problems
> > where the thread is spinning forever trying to send a message and it
> > can't? So the problem is mostly that it's an unbounded amount of time
> > before the message is sent to rpmh and this printk helps identify those
> > situations where that is happening?
> > 
> Yes, but mostly a short period of time like when other processors are in
> the middle of a restart or resource states changes have taken unusual
> amounts of time. The system will generally recover from this without
> crashing in this case. User action is investigation of the situation
> leading to these messages.
> 

Given that these messages shows up from time and seemingly is harmless,
users such as myself implements the action of ignoring these printouts.

In the cases I do see these messages it seems, as you say, to be related
to something happening in the firmware. So it's not something that a
user typically could investigate/debug anyways.


As such I do second Doug's request of not printing what looks like error
messages unless there is a persistent problem - but provide some means
for the few who would find them useful..

Regards,
Bjorn

> > Otherwise as you say above it's a bad system state where the rpmh
> > processor has gotten into a bad state like a crash? Can we recover from
> > that? Or is the only recovery a reboot of the system? Does the rpmh
> > processor reboot the system if it crashes?
> We cannot recover from such a state. The remote processor will reboot if
> it detects a failure at it's end. If the system entered a bad state, it
> is possible that RPMH requests start timing out in Linux and remote
> processor may not detect it. Hence, the timeout in rpmh_write() API. The
> advised course of action is a restart as there is no way to recover from
> this state.
> 
> --Lina
> 
>
diff mbox series

Patch

diff --git a/arch/arm64/boot/dts/qcom/sdm845.dtsi b/arch/arm64/boot/dts/qcom/sdm845.dtsi
index e506793..5ca2265 100644
--- a/arch/arm64/boot/dts/qcom/sdm845.dtsi
+++ b/arch/arm64/boot/dts/qcom/sdm845.dtsi
@@ -3631,8 +3631,10 @@ 
 			interrupts = <GIC_SPI 174 IRQ_TYPE_LEVEL_HIGH>;
 			power-domains = <&videocc VENUS_GDSC>,
 					<&videocc VCODEC0_GDSC>,
-					<&videocc VCODEC1_GDSC>;
-			power-domain-names = "venus", "vcodec0", "vcodec1";
+					<&videocc VCODEC1_GDSC>,
+					<&rpmhpd SDM845_CX>;
+			power-domain-names = "venus", "vcodec0", "vcodec1", "cx";
+			operating-points-v2 = <&venus_opp_table>;
 			clocks = <&videocc VIDEO_CC_VENUS_CTL_CORE_CLK>,
 				 <&videocc VIDEO_CC_VENUS_AHB_CLK>,
 				 <&videocc VIDEO_CC_VENUS_CTL_AXI_CLK>,
@@ -3654,6 +3656,40 @@ 
 			video-core1 {
 				compatible = "venus-encoder";
 			};
+
+			venus_opp_table: venus-opp-table {
+				compatible = "operating-points-v2";
+
+				opp-100000000 {
+					opp-hz = /bits/ 64 <100000000>;
+					required-opps = <&rpmhpd_opp_min_svs>;
+				};
+
+				opp-200000000 {
+					opp-hz = /bits/ 64 <200000000>;
+					required-opps = <&rpmhpd_opp_low_svs>;
+				};
+
+				opp-320000000 {
+					opp-hz = /bits/ 64 <320000000>;
+					required-opps = <&rpmhpd_opp_svs>;
+				};
+
+				opp-380000000 {
+					opp-hz = /bits/ 64 <380000000>;
+					required-opps = <&rpmhpd_opp_svs_l1>;
+				};
+
+				opp-444000000 {
+					opp-hz = /bits/ 64 <444000000>;
+					required-opps = <&rpmhpd_opp_nom>;
+				};
+
+				opp-533000000 {
+					opp-hz = /bits/ 64 <533000000>;
+					required-opps = <&rpmhpd_opp_turbo>;
+				};
+			};
 		};
 
 		videocc: clock-controller@ab00000 {