diff mbox series

[v3] clk: sunxi-ng: sun50i: h6: Modify GPU clock configuration to support DFS

Message ID 20220705075226.359475-1-r.stratiienko@gmail.com (mailing list archive)
State New, archived
Headers show
Series [v3] clk: sunxi-ng: sun50i: h6: Modify GPU clock configuration to support DFS | expand

Commit Message

Roman Stratiienko July 5, 2022, 7:52 a.m. UTC
Using simple bash script it was discovered that not all CCU registers
can be safely used for DFS, e.g.:

    while true
    do
        devmem 0x3001030 4 0xb0003e02
        devmem 0x3001030 4 0xb0001e02
    done

Script above changes the GPU_PLL multiplier register value. While the
script is running, the user should interact with the user interface.

Using this method the following results were obtained:

| Register  | Name           | Bits  | Values | Result |
| --        | --             | --    | --     | --     |
| 0x3001030 | GPU_PLL.MULT   | 15..8 | 20-62  | OK     |
| 0x3001030 | GPU_PLL.INDIV  |     1 | 0-1    | OK     |
| 0x3001030 | GPU_PLL.OUTDIV |     0 | 0-1    | FAIL   |
| 0x3001670 | GPU_CLK.DIV    |  3..0 | ANY    | FAIL   |

DVFS started to work seamlessly once dividers which caused the
glitches were set to fixed values.

Signed-off-by: Roman Stratiienko <r.stratiienko@gmail.com>

---

Changelog:

V2:
- Drop changes related to mux
- Drop frequency limiting
- Add unused dividers initialization

V3:
- Adjust comments
---
 drivers/clk/sunxi-ng/ccu-sun50i-h6.c | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

Comments

Jernej Škrabec July 5, 2022, 4:07 p.m. UTC | #1
Hi Roman,

Dne torek, 05. julij 2022 ob 09:52:26 CEST je Roman Stratiienko napisal(a):
> Using simple bash script it was discovered that not all CCU registers
> can be safely used for DFS, e.g.:
> 
>     while true
>     do
>         devmem 0x3001030 4 0xb0003e02
>         devmem 0x3001030 4 0xb0001e02
>     done
> 
> Script above changes the GPU_PLL multiplier register value. While the
> script is running, the user should interact with the user interface.
> 
> Using this method the following results were obtained:
> | Register  | Name           | Bits  | Values | Result |
> | --        | --             | --    | --     | --     |
> | 0x3001030 | GPU_PLL.MULT   | 15..8 | 20-62  | OK     |
> | 0x3001030 | GPU_PLL.INDIV  |     1 | 0-1    | OK     |
> | 0x3001030 | GPU_PLL.OUTDIV |     0 | 0-1    | FAIL   |
> | 0x3001670 | GPU_CLK.DIV    |  3..0 | ANY    | FAIL   |
> 
> DVFS started to work seamlessly once dividers which caused the
> glitches were set to fixed values.
> 
> Signed-off-by: Roman Stratiienko <r.stratiienko@gmail.com>
> 
> ---
> 
> Changelog:
> 
> V2:
> - Drop changes related to mux
> - Drop frequency limiting
> - Add unused dividers initialization
> 
> V3:
> - Adjust comments

I don't see any comment fixed, at least not to "1", as we discussed. Did I miss 
anything? Also, please add min and max. I also consent to R-B, which you 
didn't include. Did you resend v2 instead of v3?

Best regards,
Jernej

> ---
>  drivers/clk/sunxi-ng/ccu-sun50i-h6.c | 16 +++++++++++++---
>  1 file changed, 13 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/clk/sunxi-ng/ccu-sun50i-h6.c
> b/drivers/clk/sunxi-ng/ccu-sun50i-h6.c index 2ddf0a0da526f..068d1a6b2ebf3
> 100644
> --- a/drivers/clk/sunxi-ng/ccu-sun50i-h6.c
> +++ b/drivers/clk/sunxi-ng/ccu-sun50i-h6.c
> @@ -95,13 +95,13 @@ static struct ccu_nkmp pll_periph1_clk = {
>  	},
>  };
> 
> +/* For GPU PLL, using an output divider for DFS causes system to fail */
>  #define SUN50I_H6_PLL_GPU_REG		0x030
>  static struct ccu_nkmp pll_gpu_clk = {
>  	.enable		= BIT(31),
>  	.lock		= BIT(28),
>  	.n		= _SUNXI_CCU_MULT_MIN(8, 8, 12),
>  	.m		= _SUNXI_CCU_DIV(1, 1), /* input divider */
> -	.p		= _SUNXI_CCU_DIV(0, 1), /* output divider 
*/
>  	.common		= {
>  		.reg		= 0x030,
>  		.hw.init	= CLK_HW_INIT("pll-gpu", "osc24M",
> @@ -294,9 +294,9 @@ static SUNXI_CCU_M_WITH_MUX_GATE(deinterlace_clk,
> "deinterlace", static SUNXI_CCU_GATE(bus_deinterlace_clk,
> "bus-deinterlace", "psi-ahb1-ahb2", 0x62c, BIT(0), 0);
> 
> +/* Keep GPU_CLK divider const to avoid DFS instability. */
>  static const char * const gpu_parents[] = { "pll-gpu" };
> -static SUNXI_CCU_M_WITH_MUX_GATE(gpu_clk, "gpu", gpu_parents, 0x670,
> -				       0, 3,	/* M */
> +static SUNXI_CCU_MUX_WITH_GATE(gpu_clk, "gpu", gpu_parents, 0x670,
>  				       24, 1,	/* mux */
>  				       BIT(31),	/* gate */
>  				       CLK_SET_RATE_PARENT);
> @@ -1193,6 +1193,16 @@ static int sun50i_h6_ccu_probe(struct platform_device
> *pdev) if (IS_ERR(reg))
>  		return PTR_ERR(reg);
> 
> +	/* Force PLL_GPU output divider bits to 0 */
> +	val = readl(reg + SUN50I_H6_PLL_GPU_REG);
> +	val &= ~BIT(0);
> +	writel(val, reg + SUN50I_H6_PLL_GPU_REG);
> +
> +	/* Force GPU_CLK divider bits to 0 */
> +	val = readl(reg + gpu_clk.common.reg);
> +	val &= ~GENMASK(3, 0);
> +	writel(val, reg + gpu_clk.common.reg);
> +
>  	/* Enable the lock bits on all PLLs */
>  	for (i = 0; i < ARRAY_SIZE(pll_regs); i++) {
>  		val = readl(reg + pll_regs[i]);
> --
> 2.34.1
Roman Stratiienko July 5, 2022, 4:29 p.m. UTC | #2
Hi Jernej,

вт, 5 июл. 2022 г. в 19:07, Jernej Škrabec <jernej.skrabec@gmail.com>:
>
> Hi Roman,
>
> Dne torek, 05. julij 2022 ob 09:52:26 CEST je Roman Stratiienko napisal(a):
> > Using simple bash script it was discovered that not all CCU registers
> > can be safely used for DFS, e.g.:
> >
> >     while true
> >     do
> >         devmem 0x3001030 4 0xb0003e02
> >         devmem 0x3001030 4 0xb0001e02
> >     done
> >
> > Script above changes the GPU_PLL multiplier register value. While the
> > script is running, the user should interact with the user interface.
> >
> > Using this method the following results were obtained:
> > | Register  | Name           | Bits  | Values | Result |
> > | --        | --             | --    | --     | --     |
> > | 0x3001030 | GPU_PLL.MULT   | 15..8 | 20-62  | OK     |
> > | 0x3001030 | GPU_PLL.INDIV  |     1 | 0-1    | OK     |
> > | 0x3001030 | GPU_PLL.OUTDIV |     0 | 0-1    | FAIL   |
> > | 0x3001670 | GPU_CLK.DIV    |  3..0 | ANY    | FAIL   |
> >
> > DVFS started to work seamlessly once dividers which caused the
> > glitches were set to fixed values.
> >
> > Signed-off-by: Roman Stratiienko <r.stratiienko@gmail.com>
> >
> > ---
> >
> > Changelog:
> >
> > V2:
> > - Drop changes related to mux
> > - Drop frequency limiting
> > - Add unused dividers initialization
> >
> > V3:
> > - Adjust comments
>
> I don't see any comment fixed, at least not to "1", as we discussed. Did I miss
> anything?

I've added the "bits" word, so now it should sound correct.

> Also, please add min and max.

What is the rationale for additional limits?
CPU_PLL doesn't have these limits. I don't want to make them different.

> I also consent to R-B, which you
> didn't include.

I was expecting an explicit 'review-by' line. Anyway I can add it and
resend v4 if it's necessary.

Regards,
Roman

> Did you resend v2 instead of v3?
>
> Best regards,
> Jernej
>
> > ---
> >  drivers/clk/sunxi-ng/ccu-sun50i-h6.c | 16 +++++++++++++---
> >  1 file changed, 13 insertions(+), 3 deletions(-)
> >
> > diff --git a/drivers/clk/sunxi-ng/ccu-sun50i-h6.c
> > b/drivers/clk/sunxi-ng/ccu-sun50i-h6.c index 2ddf0a0da526f..068d1a6b2ebf3
> > 100644
> > --- a/drivers/clk/sunxi-ng/ccu-sun50i-h6.c
> > +++ b/drivers/clk/sunxi-ng/ccu-sun50i-h6.c
> > @@ -95,13 +95,13 @@ static struct ccu_nkmp pll_periph1_clk = {
> >       },
> >  };
> >
> > +/* For GPU PLL, using an output divider for DFS causes system to fail */
> >  #define SUN50I_H6_PLL_GPU_REG                0x030
> >  static struct ccu_nkmp pll_gpu_clk = {
> >       .enable         = BIT(31),
> >       .lock           = BIT(28),
> >       .n              = _SUNXI_CCU_MULT_MIN(8, 8, 12),
> >       .m              = _SUNXI_CCU_DIV(1, 1), /* input divider */
> > -     .p              = _SUNXI_CCU_DIV(0, 1), /* output divider
> */
> >       .common         = {
> >               .reg            = 0x030,
> >               .hw.init        = CLK_HW_INIT("pll-gpu", "osc24M",
> > @@ -294,9 +294,9 @@ static SUNXI_CCU_M_WITH_MUX_GATE(deinterlace_clk,
> > "deinterlace", static SUNXI_CCU_GATE(bus_deinterlace_clk,
> > "bus-deinterlace", "psi-ahb1-ahb2", 0x62c, BIT(0), 0);
> >
> > +/* Keep GPU_CLK divider const to avoid DFS instability. */
> >  static const char * const gpu_parents[] = { "pll-gpu" };
> > -static SUNXI_CCU_M_WITH_MUX_GATE(gpu_clk, "gpu", gpu_parents, 0x670,
> > -                                    0, 3,    /* M */
> > +static SUNXI_CCU_MUX_WITH_GATE(gpu_clk, "gpu", gpu_parents, 0x670,
> >                                      24, 1,   /* mux */
> >                                      BIT(31), /* gate */
> >                                      CLK_SET_RATE_PARENT);
> > @@ -1193,6 +1193,16 @@ static int sun50i_h6_ccu_probe(struct platform_device
> > *pdev) if (IS_ERR(reg))
> >               return PTR_ERR(reg);
> >
> > +     /* Force PLL_GPU output divider bits to 0 */
> > +     val = readl(reg + SUN50I_H6_PLL_GPU_REG);
> > +     val &= ~BIT(0);
> > +     writel(val, reg + SUN50I_H6_PLL_GPU_REG);
> > +
> > +     /* Force GPU_CLK divider bits to 0 */
> > +     val = readl(reg + gpu_clk.common.reg);
> > +     val &= ~GENMASK(3, 0);
> > +     writel(val, reg + gpu_clk.common.reg);
> > +
> >       /* Enable the lock bits on all PLLs */
> >       for (i = 0; i < ARRAY_SIZE(pll_regs); i++) {
> >               val = readl(reg + pll_regs[i]);
> > --
> > 2.34.1
>
>
Jernej Škrabec July 5, 2022, 6:06 p.m. UTC | #3
Dne torek, 05. julij 2022 ob 18:29:39 CEST je Roman Stratiienko napisal(a):
> Hi Jernej,
> 
> вт, 5 июл. 2022 г. в 19:07, Jernej Škrabec <jernej.skrabec@gmail.com>:
> > Hi Roman,
> > 
> > Dne torek, 05. julij 2022 ob 09:52:26 CEST je Roman Stratiienko 
napisal(a):
> > > Using simple bash script it was discovered that not all CCU registers
> > > 
> > > can be safely used for DFS, e.g.:
> > >     while true
> > >     do
> > >     
> > >         devmem 0x3001030 4 0xb0003e02
> > >         devmem 0x3001030 4 0xb0001e02
> > >     
> > >     done
> > > 
> > > Script above changes the GPU_PLL multiplier register value. While the
> > > script is running, the user should interact with the user interface.
> > > 
> > > Using this method the following results were obtained:
> > > | Register  | Name           | Bits  | Values | Result |
> > > | --        | --             | --    | --     | --     |
> > > | 0x3001030 | GPU_PLL.MULT   | 15..8 | 20-62  | OK     |
> > > | 0x3001030 | GPU_PLL.INDIV  |     1 | 0-1    | OK     |
> > > | 0x3001030 | GPU_PLL.OUTDIV |     0 | 0-1    | FAIL   |
> > > | 0x3001670 | GPU_CLK.DIV    |  3..0 | ANY    | FAIL   |
> > > 
> > > DVFS started to work seamlessly once dividers which caused the
> > > glitches were set to fixed values.
> > > 
> > > Signed-off-by: Roman Stratiienko <r.stratiienko@gmail.com>
> > > 
> > > ---
> > > 
> > > Changelog:
> > > 
> > > V2:
> > > - Drop changes related to mux
> > > - Drop frequency limiting
> > > - Add unused dividers initialization
> > > 
> > > V3:
> > > - Adjust comments
> > 
> > I don't see any comment fixed, at least not to "1", as we discussed. Did I
> > miss anything?
> 
> I've added the "bits" word, so now it should sound correct.

Technically it's correct, but this would be third form of comments for fixed 
bits. Let's stick to the form which is most informative ("Force PLL_GPU output 
divider to 1"). Ideally, comment would also point to gpu_clk comment for 
reason why, like it's done for video PLL block already.

> 
> > Also, please add min and max.
> 
> What is the rationale for additional limits?

If limits are specified in whatever form, they should be added. As I said 
several times already, vendor code limits PLL frequency to 288 MHz minimum and 
lists maximum. As experienced a few times before with video PLLs, these are 
important, otherwise PLL is unstable. For example, OPP table in vendor DT has 
two operating points lower than 288 MHz, which means it would either lock up 
or be unstable. In such cases, vendor code actually sets GPU_CLK divider to 2, 
but we can skip them, because GPU_CLK divider will be hardcoded to 1 with this 
patch.

> CPU_PLL doesn't have these limits. I don't want to make them different.

Why CPU_PLL? Why not video PLL? In any case, it doesn't matter if struct looks 
similar to some other or is unique. Only important thing is that struct 
describes PLL as best as possible.

> 
> > I also consent to R-B, which you
> > didn't include.
> 
> I was expecting an explicit 'review-by' line. Anyway I can add it and
> resend v4 if it's necessary.

If you at least add min and max limits, you can add following tag:
Reviewed-by: Jernej Skrabec <jernej.skrabec@gmail.com>

If you send it before Friday, it will be in 5.20.

Best regards,
Jernej

> 
> Regards,
> Roman
> 
> > Did you resend v2 instead of v3?
> > 
> > Best regards,
> > Jernej
> > 
> > > ---
> > > 
> > >  drivers/clk/sunxi-ng/ccu-sun50i-h6.c | 16 +++++++++++++---
> > >  1 file changed, 13 insertions(+), 3 deletions(-)
> > > 
> > > diff --git a/drivers/clk/sunxi-ng/ccu-sun50i-h6.c
> > > b/drivers/clk/sunxi-ng/ccu-sun50i-h6.c index
> > > 2ddf0a0da526f..068d1a6b2ebf3
> > > 100644
> > > --- a/drivers/clk/sunxi-ng/ccu-sun50i-h6.c
> > > +++ b/drivers/clk/sunxi-ng/ccu-sun50i-h6.c
> > > @@ -95,13 +95,13 @@ static struct ccu_nkmp pll_periph1_clk = {
> > > 
> > >       },
> > >  
> > >  };
> > > 
> > > +/* For GPU PLL, using an output divider for DFS causes system to fail
> > > */
> > > 
> > >  #define SUN50I_H6_PLL_GPU_REG                0x030
> > >  static struct ccu_nkmp pll_gpu_clk = {
> > >  
> > >       .enable         = BIT(31),
> > >       .lock           = BIT(28),
> > >       .n              = _SUNXI_CCU_MULT_MIN(8, 8, 12),
> > >       .m              = _SUNXI_CCU_DIV(1, 1), /* input divider */
> > > 
> > > -     .p              = _SUNXI_CCU_DIV(0, 1), /* output divider
> > 
> > */
> > 
> > >       .common         = {
> > >       
> > >               .reg            = 0x030,
> > >               .hw.init        = CLK_HW_INIT("pll-gpu", "osc24M",
> > > 
> > > @@ -294,9 +294,9 @@ static SUNXI_CCU_M_WITH_MUX_GATE(deinterlace_clk,
> > > "deinterlace", static SUNXI_CCU_GATE(bus_deinterlace_clk,
> > > "bus-deinterlace", "psi-ahb1-ahb2", 0x62c, BIT(0), 0);
> > > 
> > > +/* Keep GPU_CLK divider const to avoid DFS instability. */
> > > 
> > >  static const char * const gpu_parents[] = { "pll-gpu" };
> > > 
> > > -static SUNXI_CCU_M_WITH_MUX_GATE(gpu_clk, "gpu", gpu_parents, 0x670,
> > > -                                    0, 3,    /* M */
> > > +static SUNXI_CCU_MUX_WITH_GATE(gpu_clk, "gpu", gpu_parents, 0x670,
> > > 
> > >                                      24, 1,   /* mux */
> > >                                      BIT(31), /* gate */
> > >                                      CLK_SET_RATE_PARENT);
> > > 
> > > @@ -1193,6 +1193,16 @@ static int sun50i_h6_ccu_probe(struct
> > > platform_device *pdev) if (IS_ERR(reg))
> > > 
> > >               return PTR_ERR(reg);
> > > 
> > > +     /* Force PLL_GPU output divider bits to 0 */
> > > +     val = readl(reg + SUN50I_H6_PLL_GPU_REG);
> > > +     val &= ~BIT(0);
> > > +     writel(val, reg + SUN50I_H6_PLL_GPU_REG);
> > > +
> > > +     /* Force GPU_CLK divider bits to 0 */
> > > +     val = readl(reg + gpu_clk.common.reg);
> > > +     val &= ~GENMASK(3, 0);
> > > +     writel(val, reg + gpu_clk.common.reg);
> > > +
> > > 
> > >       /* Enable the lock bits on all PLLs */
> > >       for (i = 0; i < ARRAY_SIZE(pll_regs); i++) {
> > >       
> > >               val = readl(reg + pll_regs[i]);
> > > 
> > > --
> > > 2.34.1
Roman Stratiienko July 6, 2022, 9:46 a.m. UTC | #4
вт, 5 июл. 2022 г. в 21:07, Jernej Škrabec <jernej.skrabec@gmail.com>:
>
> Dne torek, 05. julij 2022 ob 18:29:39 CEST je Roman Stratiienko napisal(a):
> > Hi Jernej,
> >
> > вт, 5 июл. 2022 г. в 19:07, Jernej Škrabec <jernej.skrabec@gmail.com>:
> > > Hi Roman,
> > >
> > > Dne torek, 05. julij 2022 ob 09:52:26 CEST je Roman Stratiienko
> napisal(a):
> > > > Using simple bash script it was discovered that not all CCU registers
> > > >
> > > > can be safely used for DFS, e.g.:
> > > >     while true
> > > >     do
> > > >
> > > >         devmem 0x3001030 4 0xb0003e02
> > > >         devmem 0x3001030 4 0xb0001e02
> > > >
> > > >     done
> > > >
> > > > Script above changes the GPU_PLL multiplier register value. While the
> > > > script is running, the user should interact with the user interface.
> > > >
> > > > Using this method the following results were obtained:
> > > > | Register  | Name           | Bits  | Values | Result |
> > > > | --        | --             | --    | --     | --     |
> > > > | 0x3001030 | GPU_PLL.MULT   | 15..8 | 20-62  | OK     |
> > > > | 0x3001030 | GPU_PLL.INDIV  |     1 | 0-1    | OK     |
> > > > | 0x3001030 | GPU_PLL.OUTDIV |     0 | 0-1    | FAIL   |
> > > > | 0x3001670 | GPU_CLK.DIV    |  3..0 | ANY    | FAIL   |
> > > >
> > > > DVFS started to work seamlessly once dividers which caused the
> > > > glitches were set to fixed values.
> > > >
> > > > Signed-off-by: Roman Stratiienko <r.stratiienko@gmail.com>
> > > >
> > > > ---
> > > >
> > > > Changelog:
> > > >
> > > > V2:
> > > > - Drop changes related to mux
> > > > - Drop frequency limiting
> > > > - Add unused dividers initialization
> > > >
> > > > V3:
> > > > - Adjust comments
> > >
> > > I don't see any comment fixed, at least not to "1", as we discussed. Did I
> > > miss anything?
> >
> > I've added the "bits" word, so now it should sound correct.
>
> Technically it's correct, but this would be third form of comments for fixed
> bits. Let's stick to the form which is most informative ("Force PLL_GPU output
> divider to 1"). Ideally, comment would also point to gpu_clk comment for
> reason why, like it's done for video PLL block already.
>
> >
> > > Also, please add min and max.
> >
> > What is the rationale for additional limits?
>
> If limits are specified in whatever form, they should be added. As I said
> several times already, vendor code limits PLL frequency to 288 MHz minimum and
> lists maximum. As experienced a few times before with video PLLs, these are
> important, otherwise PLL is unstable. For example, OPP table in vendor DT has
> two operating points lower than 288 MHz, which means it would either lock up
> or be unstable. In such cases, vendor code actually sets GPU_CLK divider to 2,
> but we can skip them, because GPU_CLK divider will be hardcoded to 1 with this
> patch.

What is the rationale behind vendor's freq limitation?

There's no min_rate field in ccu_nkmp. After I changed it to ccu_nm
and set limits, the system started to behave unstable with a lot of
messages in dmesg:

[   40.089091] panfrost 1800000.gpu: _generic_set_opp_clk_only: failed
to set clock rate: -22
[   40.097698] devfreq 1800000.gpu: dvfs failed with (-22) error

From the other end I have no issues so far with the current version
and I have a lot of other work to do.
I think it's a good point to stop any further improvements until
testing results show any issues with the current version.

> > CPU_PLL doesn't have these limits. I don't want to make them different.
>
> Why CPU_PLL?

According to the H6 usermanual only CPU and GPU PLLs support smooth
clock transition during DFS.

Regards,
Roman.

> Why not video PLL? In any case, it doesn't matter if struct looks
> similar to some other or is unique. Only important thing is that struct
> describes PLL as best as possible.
>
> >
> > > I also consent to R-B, which you
> > > didn't include.
> >
> > I was expecting an explicit 'review-by' line. Anyway I can add it and
> > resend v4 if it's necessary.
>
> If you at least add min and max limits, you can add following tag:
> Reviewed-by: Jernej Skrabec <jernej.skrabec@gmail.com>
>
> If you send it before Friday, it will be in 5.20.
>
> Best regards,
> Jernej
>
> >
> > Regards,
> > Roman
> >
> > > Did you resend v2 instead of v3?
> > >
> > > Best regards,
> > > Jernej
> > >
> > > > ---
> > > >
> > > >  drivers/clk/sunxi-ng/ccu-sun50i-h6.c | 16 +++++++++++++---
> > > >  1 file changed, 13 insertions(+), 3 deletions(-)
> > > >
> > > > diff --git a/drivers/clk/sunxi-ng/ccu-sun50i-h6.c
> > > > b/drivers/clk/sunxi-ng/ccu-sun50i-h6.c index
> > > > 2ddf0a0da526f..068d1a6b2ebf3
> > > > 100644
> > > > --- a/drivers/clk/sunxi-ng/ccu-sun50i-h6.c
> > > > +++ b/drivers/clk/sunxi-ng/ccu-sun50i-h6.c
> > > > @@ -95,13 +95,13 @@ static struct ccu_nkmp pll_periph1_clk = {
> > > >
> > > >       },
> > > >
> > > >  };
> > > >
> > > > +/* For GPU PLL, using an output divider for DFS causes system to fail
> > > > */
> > > >
> > > >  #define SUN50I_H6_PLL_GPU_REG                0x030
> > > >  static struct ccu_nkmp pll_gpu_clk = {
> > > >
> > > >       .enable         = BIT(31),
> > > >       .lock           = BIT(28),
> > > >       .n              = _SUNXI_CCU_MULT_MIN(8, 8, 12),
> > > >       .m              = _SUNXI_CCU_DIV(1, 1), /* input divider */
> > > >
> > > > -     .p              = _SUNXI_CCU_DIV(0, 1), /* output divider
> > >
> > > */
> > >
> > > >       .common         = {
> > > >
> > > >               .reg            = 0x030,
> > > >               .hw.init        = CLK_HW_INIT("pll-gpu", "osc24M",
> > > >
> > > > @@ -294,9 +294,9 @@ static SUNXI_CCU_M_WITH_MUX_GATE(deinterlace_clk,
> > > > "deinterlace", static SUNXI_CCU_GATE(bus_deinterlace_clk,
> > > > "bus-deinterlace", "psi-ahb1-ahb2", 0x62c, BIT(0), 0);
> > > >
> > > > +/* Keep GPU_CLK divider const to avoid DFS instability. */
> > > >
> > > >  static const char * const gpu_parents[] = { "pll-gpu" };
> > > >
> > > > -static SUNXI_CCU_M_WITH_MUX_GATE(gpu_clk, "gpu", gpu_parents, 0x670,
> > > > -                                    0, 3,    /* M */
> > > > +static SUNXI_CCU_MUX_WITH_GATE(gpu_clk, "gpu", gpu_parents, 0x670,
> > > >
> > > >                                      24, 1,   /* mux */
> > > >                                      BIT(31), /* gate */
> > > >                                      CLK_SET_RATE_PARENT);
> > > >
> > > > @@ -1193,6 +1193,16 @@ static int sun50i_h6_ccu_probe(struct
> > > > platform_device *pdev) if (IS_ERR(reg))
> > > >
> > > >               return PTR_ERR(reg);
> > > >
> > > > +     /* Force PLL_GPU output divider bits to 0 */
> > > > +     val = readl(reg + SUN50I_H6_PLL_GPU_REG);
> > > > +     val &= ~BIT(0);
> > > > +     writel(val, reg + SUN50I_H6_PLL_GPU_REG);
> > > > +
> > > > +     /* Force GPU_CLK divider bits to 0 */
> > > > +     val = readl(reg + gpu_clk.common.reg);
> > > > +     val &= ~GENMASK(3, 0);
> > > > +     writel(val, reg + gpu_clk.common.reg);
> > > > +
> > > >
> > > >       /* Enable the lock bits on all PLLs */
> > > >       for (i = 0; i < ARRAY_SIZE(pll_regs); i++) {
> > > >
> > > >               val = readl(reg + pll_regs[i]);
> > > >
> > > > --
> > > > 2.34.1
>
>
Jernej Škrabec July 8, 2022, 4:57 a.m. UTC | #5
Hi Roman,

Dne sreda, 06. julij 2022 ob 11:46:13 CEST je Roman Stratiienko napisal(a):
> вт, 5 июл. 2022 г. в 21:07, Jernej Škrabec <jernej.skrabec@gmail.com>:
> > Dne torek, 05. julij 2022 ob 18:29:39 CEST je Roman Stratiienko 
napisal(a):
> > > Hi Jernej,
> > > 
> > > вт, 5 июл. 2022 г. в 19:07, Jernej Škrabec <jernej.skrabec@gmail.com>:
> > > > Hi Roman,
> > > > 
> > > > Dne torek, 05. julij 2022 ob 09:52:26 CEST je Roman Stratiienko
> > 
> > napisal(a):
> > > > > Using simple bash script it was discovered that not all CCU
> > > > > registers
> > > > > 
> > > > > can be safely used for DFS, e.g.:
> > > > >     while true
> > > > >     do
> > > > >     
> > > > >         devmem 0x3001030 4 0xb0003e02
> > > > >         devmem 0x3001030 4 0xb0001e02
> > > > >     
> > > > >     done
> > > > > 
> > > > > Script above changes the GPU_PLL multiplier register value. While
> > > > > the
> > > > > script is running, the user should interact with the user interface.
> > > > > 
> > > > > Using this method the following results were obtained:
> > > > > | Register  | Name           | Bits  | Values | Result |
> > > > > | --        | --             | --    | --     | --     |
> > > > > | 0x3001030 | GPU_PLL.MULT   | 15..8 | 20-62  | OK     |
> > > > > | 0x3001030 | GPU_PLL.INDIV  |     1 | 0-1    | OK     |
> > > > > | 0x3001030 | GPU_PLL.OUTDIV |     0 | 0-1    | FAIL   |
> > > > > | 0x3001670 | GPU_CLK.DIV    |  3..0 | ANY    | FAIL   |
> > > > > 
> > > > > DVFS started to work seamlessly once dividers which caused the
> > > > > glitches were set to fixed values.
> > > > > 
> > > > > Signed-off-by: Roman Stratiienko <r.stratiienko@gmail.com>
> > > > > 
> > > > > ---
> > > > > 
> > > > > Changelog:
> > > > > 
> > > > > V2:
> > > > > - Drop changes related to mux
> > > > > - Drop frequency limiting
> > > > > - Add unused dividers initialization
> > > > > 
> > > > > V3:
> > > > > - Adjust comments
> > > > 
> > > > I don't see any comment fixed, at least not to "1", as we discussed.
> > > > Did I
> > > > miss anything?
> > > 
> > > I've added the "bits" word, so now it should sound correct.
> > 
> > Technically it's correct, but this would be third form of comments for
> > fixed bits. Let's stick to the form which is most informative ("Force
> > PLL_GPU output divider to 1"). Ideally, comment would also point to
> > gpu_clk comment for reason why, like it's done for video PLL block
> > already.
> > 
> > > > Also, please add min and max.
> > > 
> > > What is the rationale for additional limits?
> > 
> > If limits are specified in whatever form, they should be added. As I said
> > several times already, vendor code limits PLL frequency to 288 MHz minimum
> > and lists maximum. As experienced a few times before with video PLLs,
> > these are important, otherwise PLL is unstable. For example, OPP table in
> > vendor DT has two operating points lower than 288 MHz, which means it
> > would either lock up or be unstable. In such cases, vendor code actually
> > sets GPU_CLK divider to 2, but we can skip them, because GPU_CLK divider
> > will be hardcoded to 1 with this patch.
> 
> What is the rationale behind vendor's freq limitation?

You have to ask Allwinner. But usually it's a good reason, otherwise they 
wouldn't bother to write such code.

> 
> There's no min_rate field in ccu_nkmp. After I changed it to ccu_nm
> and set limits, the system started to behave unstable with a lot of
> messages in dmesg:
> 
> [   40.089091] panfrost 1800000.gpu: _generic_set_opp_clk_only: failed
> to set clock rate: -22
> [   40.097698] devfreq 1800000.gpu: dvfs failed with (-22) error

Did you remove points below 288 MHz?

> 
> From the other end I have no issues so far with the current version
> and I have a lot of other work to do.
> I think it's a good point to stop any further improvements until
> testing results show any issues with the current version.
> 
> > > CPU_PLL doesn't have these limits. I don't want to make them different.
> > 
> > Why CPU_PLL?
> 
> According to the H6 usermanual only CPU and GPU PLLs support smooth
> clock transition during DFS.

This doesn't mean they have exactly the same limitations. Anyway, I'll merge 
this one. So:

Reviewed-by: Jernej Skrabec <jernej.skrabec@gmail.com>

Best regards,
Jernej

> 
> Regards,
> Roman.
> 
> > Why not video PLL? In any case, it doesn't matter if struct looks
> > similar to some other or is unique. Only important thing is that struct
> > describes PLL as best as possible.
> > 
> > > > I also consent to R-B, which you
> > > > didn't include.
> > > 
> > > I was expecting an explicit 'review-by' line. Anyway I can add it and
> > > resend v4 if it's necessary.
> > 
> > If you at least add min and max limits, you can add following tag:
> > Reviewed-by: Jernej Skrabec <jernej.skrabec@gmail.com>
> > 
> > If you send it before Friday, it will be in 5.20.
> > 
> > Best regards,
> > Jernej
> > 
> > > Regards,
> > > Roman
> > > 
> > > > Did you resend v2 instead of v3?
> > > > 
> > > > Best regards,
> > > > Jernej
> > > > 
> > > > > ---
> > > > > 
> > > > >  drivers/clk/sunxi-ng/ccu-sun50i-h6.c | 16 +++++++++++++---
> > > > >  1 file changed, 13 insertions(+), 3 deletions(-)
> > > > > 
> > > > > diff --git a/drivers/clk/sunxi-ng/ccu-sun50i-h6.c
> > > > > b/drivers/clk/sunxi-ng/ccu-sun50i-h6.c index
> > > > > 2ddf0a0da526f..068d1a6b2ebf3
> > > > > 100644
> > > > > --- a/drivers/clk/sunxi-ng/ccu-sun50i-h6.c
> > > > > +++ b/drivers/clk/sunxi-ng/ccu-sun50i-h6.c
> > > > > @@ -95,13 +95,13 @@ static struct ccu_nkmp pll_periph1_clk = {
> > > > > 
> > > > >       },
> > > > >  
> > > > >  };
> > > > > 
> > > > > +/* For GPU PLL, using an output divider for DFS causes system to
> > > > > fail
> > > > > */
> > > > > 
> > > > >  #define SUN50I_H6_PLL_GPU_REG                0x030
> > > > >  static struct ccu_nkmp pll_gpu_clk = {
> > > > >  
> > > > >       .enable         = BIT(31),
> > > > >       .lock           = BIT(28),
> > > > >       .n              = _SUNXI_CCU_MULT_MIN(8, 8, 12),
> > > > >       .m              = _SUNXI_CCU_DIV(1, 1), /* input divider */
> > > > > 
> > > > > -     .p              = _SUNXI_CCU_DIV(0, 1), /* output divider
> > > > 
> > > > */
> > > > 
> > > > >       .common         = {
> > > > >       
> > > > >               .reg            = 0x030,
> > > > >               .hw.init        = CLK_HW_INIT("pll-gpu", "osc24M",
> > > > > 
> > > > > @@ -294,9 +294,9 @@ static
> > > > > SUNXI_CCU_M_WITH_MUX_GATE(deinterlace_clk,
> > > > > "deinterlace", static SUNXI_CCU_GATE(bus_deinterlace_clk,
> > > > > "bus-deinterlace", "psi-ahb1-ahb2", 0x62c, BIT(0), 0);
> > > > > 
> > > > > +/* Keep GPU_CLK divider const to avoid DFS instability. */
> > > > > 
> > > > >  static const char * const gpu_parents[] = { "pll-gpu" };
> > > > > 
> > > > > -static SUNXI_CCU_M_WITH_MUX_GATE(gpu_clk, "gpu", gpu_parents,
> > > > > 0x670,
> > > > > -                                    0, 3,    /* M */
> > > > > +static SUNXI_CCU_MUX_WITH_GATE(gpu_clk, "gpu", gpu_parents, 0x670,
> > > > > 
> > > > >                                      24, 1,   /* mux */
> > > > >                                      BIT(31), /* gate */
> > > > >                                      CLK_SET_RATE_PARENT);
> > > > > 
> > > > > @@ -1193,6 +1193,16 @@ static int sun50i_h6_ccu_probe(struct
> > > > > platform_device *pdev) if (IS_ERR(reg))
> > > > > 
> > > > >               return PTR_ERR(reg);
> > > > > 
> > > > > +     /* Force PLL_GPU output divider bits to 0 */
> > > > > +     val = readl(reg + SUN50I_H6_PLL_GPU_REG);
> > > > > +     val &= ~BIT(0);
> > > > > +     writel(val, reg + SUN50I_H6_PLL_GPU_REG);
> > > > > +
> > > > > +     /* Force GPU_CLK divider bits to 0 */
> > > > > +     val = readl(reg + gpu_clk.common.reg);
> > > > > +     val &= ~GENMASK(3, 0);
> > > > > +     writel(val, reg + gpu_clk.common.reg);
> > > > > +
> > > > > 
> > > > >       /* Enable the lock bits on all PLLs */
> > > > >       for (i = 0; i < ARRAY_SIZE(pll_regs); i++) {
> > > > >       
> > > > >               val = readl(reg + pll_regs[i]);
> > > > > 
> > > > > --
> > > > > 2.34.1
Jernej Škrabec July 8, 2022, 4:15 p.m. UTC | #6
Dne torek, 05. julij 2022 ob 09:52:26 CEST je Roman Stratiienko napisal(a):
> Using simple bash script it was discovered that not all CCU registers
> can be safely used for DFS, e.g.:
> 
>     while true
>     do
>         devmem 0x3001030 4 0xb0003e02
>         devmem 0x3001030 4 0xb0001e02
>     done
> 
> Script above changes the GPU_PLL multiplier register value. While the
> script is running, the user should interact with the user interface.
> 
> Using this method the following results were obtained:
> | Register  | Name           | Bits  | Values | Result |
> | --        | --             | --    | --     | --     |
> | 0x3001030 | GPU_PLL.MULT   | 15..8 | 20-62  | OK     |
> | 0x3001030 | GPU_PLL.INDIV  |     1 | 0-1    | OK     |
> | 0x3001030 | GPU_PLL.OUTDIV |     0 | 0-1    | FAIL   |
> | 0x3001670 | GPU_CLK.DIV    |  3..0 | ANY    | FAIL   |
> 
> DVFS started to work seamlessly once dividers which caused the
> glitches were set to fixed values.
> 
> Signed-off-by: Roman Stratiienko <r.stratiienko@gmail.com>
> 

Applied, thanks!

Best regards,
Jernej
diff mbox series

Patch

diff --git a/drivers/clk/sunxi-ng/ccu-sun50i-h6.c b/drivers/clk/sunxi-ng/ccu-sun50i-h6.c
index 2ddf0a0da526f..068d1a6b2ebf3 100644
--- a/drivers/clk/sunxi-ng/ccu-sun50i-h6.c
+++ b/drivers/clk/sunxi-ng/ccu-sun50i-h6.c
@@ -95,13 +95,13 @@  static struct ccu_nkmp pll_periph1_clk = {
 	},
 };
 
+/* For GPU PLL, using an output divider for DFS causes system to fail */
 #define SUN50I_H6_PLL_GPU_REG		0x030
 static struct ccu_nkmp pll_gpu_clk = {
 	.enable		= BIT(31),
 	.lock		= BIT(28),
 	.n		= _SUNXI_CCU_MULT_MIN(8, 8, 12),
 	.m		= _SUNXI_CCU_DIV(1, 1), /* input divider */
-	.p		= _SUNXI_CCU_DIV(0, 1), /* output divider */
 	.common		= {
 		.reg		= 0x030,
 		.hw.init	= CLK_HW_INIT("pll-gpu", "osc24M",
@@ -294,9 +294,9 @@  static SUNXI_CCU_M_WITH_MUX_GATE(deinterlace_clk, "deinterlace",
 static SUNXI_CCU_GATE(bus_deinterlace_clk, "bus-deinterlace", "psi-ahb1-ahb2",
 		      0x62c, BIT(0), 0);
 
+/* Keep GPU_CLK divider const to avoid DFS instability. */
 static const char * const gpu_parents[] = { "pll-gpu" };
-static SUNXI_CCU_M_WITH_MUX_GATE(gpu_clk, "gpu", gpu_parents, 0x670,
-				       0, 3,	/* M */
+static SUNXI_CCU_MUX_WITH_GATE(gpu_clk, "gpu", gpu_parents, 0x670,
 				       24, 1,	/* mux */
 				       BIT(31),	/* gate */
 				       CLK_SET_RATE_PARENT);
@@ -1193,6 +1193,16 @@  static int sun50i_h6_ccu_probe(struct platform_device *pdev)
 	if (IS_ERR(reg))
 		return PTR_ERR(reg);
 
+	/* Force PLL_GPU output divider bits to 0 */
+	val = readl(reg + SUN50I_H6_PLL_GPU_REG);
+	val &= ~BIT(0);
+	writel(val, reg + SUN50I_H6_PLL_GPU_REG);
+
+	/* Force GPU_CLK divider bits to 0 */
+	val = readl(reg + gpu_clk.common.reg);
+	val &= ~GENMASK(3, 0);
+	writel(val, reg + gpu_clk.common.reg);
+
 	/* Enable the lock bits on all PLLs */
 	for (i = 0; i < ARRAY_SIZE(pll_regs); i++) {
 		val = readl(reg + pll_regs[i]);