diff mbox series

[3/6] clk: mvebu: add CPU clock driver for Armada 7K/8K

Message ID 20180922181709.13007-4-gregory.clement@bootlin.com (mailing list archive)
State New, archived
Headers show
Series Add CPU clock support for Armada 7K/8K | expand

Commit Message

Gregory CLEMENT Sept. 22, 2018, 6:17 p.m. UTC
The CPU frequency is managed at the AP level for the Armada 7K/8K. The
CPU frequency is modified by cluster: the CPUs of the same cluster have
the same frequency.

This patch adds the clock driver that will be used by CPUFreq, it is
based on the work of Omri Itach <omrii@marvell.com>.

Signed-off-by: Gregory CLEMENT <gregory.clement@bootlin.com>
---
 drivers/clk/mvebu/Kconfig      |   3 +
 drivers/clk/mvebu/Makefile     |   1 +
 drivers/clk/mvebu/ap-cpu-clk.c | 265 +++++++++++++++++++++++++++++++++
 3 files changed, 269 insertions(+)
 create mode 100644 drivers/clk/mvebu/ap-cpu-clk.c

Comments

Stephen Boyd Oct. 17, 2018, 6:28 p.m. UTC | #1
Quoting Gregory CLEMENT (2018-09-22 11:17:06)
> diff --git a/drivers/clk/mvebu/ap-cpu-clk.c b/drivers/clk/mvebu/ap-cpu-clk.c
> new file mode 100644
> index 000000000000..1b498cfe9191
> --- /dev/null
> +++ b/drivers/clk/mvebu/ap-cpu-clk.c
> @@ -0,0 +1,265 @@
> +// SPDX-License-Identifier: GPL-2.0+
> +/*
> + * Marvell Armada AP CPU Clock Controller
> + *
> + * Copyright (C) 2018 Marvell
> + *
> + * Omri Itach <omrii@marvell.com>
> + * Gregory Clement <gregory.clement@bootlin.com>
> + */
> +
> +#define pr_fmt(fmt) "ap-cpu-clk: " fmt
> +
> +#include "armada_ap_cp_helper.h"

Does this include need to come first? Preferably local includes come
after any <linux> ones.

> +#include <linux/clk-provider.h>
> +#include <linux/clk.h>
> +#include <linux/init.h>

Is this used?

> +#include <linux/mfd/syscon.h>
> +#include <linux/of.h>
> +#include <linux/of_address.h>
> +#include <linux/platform_device.h>
> +#include <linux/regmap.h>
> +
> +#define AP806_CPU_CLUSTER0             0
> +#define AP806_CPU_CLUSTER1             1
> +#define AP806_CPUS_PER_CLUSTER         2
> +#define APN806_CPU1_MASK               0x1
> +
> +#define APN806_CLUSTER_NUM_OFFSET      8
> +#define APN806_CLUSTER_NUM_MASK                (1 << APN806_CLUSTER_NUM_OFFSET)
> +
> +#define APN806_MAX_DIVIDER             32
> +
> +/* AP806 CPU DFS register mapping*/
> +#define AP806_CA72MP2_0_PLL_CR_0_REG_OFFSET            0x278
> +#define AP806_CA72MP2_0_PLL_CR_1_REG_OFFSET            0x280
> +#define AP806_CA72MP2_0_PLL_CR_2_REG_OFFSET            0x284
> +#define AP806_CA72MP2_0_PLL_SR_REG_OFFSET              0xC94
> +
> +#define AP806_CA72MP2_0_PLL_CR_CLUSTER_OFFSET          0x14
> +#define AP806_PLL_CR_0_CPU_CLK_DIV_RATIO_OFFSET                0
> +#define AP806_PLL_CR_0_CPU_CLK_DIV_RATIO_MASK \
> +                       (0x3f << AP806_PLL_CR_0_CPU_CLK_DIV_RATIO_OFFSET)
> +#define AP806_PLL_CR_0_CPU_CLK_RELOAD_FORCE_OFFSET     24
> +#define AP806_PLL_CR_0_CPU_CLK_RELOAD_FORCE_MASK \
> +                       (0x1 << AP806_PLL_CR_0_CPU_CLK_RELOAD_FORCE_OFFSET)
> +#define AP806_PLL_CR_0_CPU_CLK_RELOAD_RATIO_OFFSET     16
> +#define AP806_CA72MP2_0_PLL_RATIO_STATE                        11
> +
> +#define to_clk(hw) container_of(hw, struct ap_cpu_clk, hw)
> +
> +/*
> + * struct ap806_clk: CPU cluster clock controller instance
> + * @cluster: Cluster clock controller index
> + * @clk_name: Cluster clock controller name
> + * @dev : Cluster clock device
> + * @hw: HW specific structure of Cluster clock controller
> + * @pll_cr_base: CA72MP2 Register base (Device Sample at Reset register)
> + */
> +struct ap_cpu_clk {
> +       int cluster;

unsigned?

> +       const char *clk_name;
> +       struct device *dev;
> +       struct clk_hw hw;
> +       struct regmap *pll_cr_base;
> +};
> +
> +static struct clk **cluster_clks;
> +static struct clk_onecell_data clk_data;

Can you give these some better names that are ap_cpu specific? Grepping
for clk_data will make lots of hits otherwise because they're so
generic.

> +
> +static unsigned long ap_cpu_clk_recalc_rate(struct clk_hw *hw,
> +                                           unsigned long parent_rate)
> +{
> +       struct ap_cpu_clk *clk = to_clk(hw);
> +       unsigned int cpu_clkdiv_reg = AP806_CA72MP2_0_PLL_CR_0_REG_OFFSET +
> +               (clk->cluster * AP806_CA72MP2_0_PLL_CR_CLUSTER_OFFSET);

Nitpick: This is pretty unreadable. Can you just declare the variables
up here and then do the assignment later on in the function and avoid
splitting across many lines?

> +       int cpu_clkdiv_ratio;
> +
> +       regmap_read(clk->pll_cr_base, cpu_clkdiv_reg, &cpu_clkdiv_ratio);
> +       cpu_clkdiv_ratio &= AP806_PLL_CR_0_CPU_CLK_DIV_RATIO_MASK;
> +       cpu_clkdiv_ratio >>= AP806_PLL_CR_0_CPU_CLK_DIV_RATIO_OFFSET;
> +
> +       return parent_rate / cpu_clkdiv_ratio;
> +}
> +
> +static int ap_cpu_clk_set_rate(struct clk_hw *hw, unsigned long rate,
> +                             unsigned long parent_rate)
> +{
> +       struct ap_cpu_clk *clk = to_clk(hw);
> +       int reg, divider = parent_rate / rate;
> +       unsigned int cpu_clkdiv_reg, cpu_force_reg, cpu_ratio_reg;
> +
> +       cpu_clkdiv_reg = AP806_CA72MP2_0_PLL_CR_0_REG_OFFSET +
> +               (clk->cluster * AP806_CA72MP2_0_PLL_CR_CLUSTER_OFFSET);
> +       cpu_force_reg = AP806_CA72MP2_0_PLL_CR_1_REG_OFFSET +
> +               (clk->cluster * AP806_CA72MP2_0_PLL_CR_CLUSTER_OFFSET);
> +       cpu_ratio_reg = AP806_CA72MP2_0_PLL_CR_2_REG_OFFSET +
> +               (clk->cluster * AP806_CA72MP2_0_PLL_CR_CLUSTER_OFFSET);
> +
> +       /* 1. Set CPU divider */
> +       regmap_update_bits(clk->pll_cr_base, cpu_clkdiv_reg,
> +                          AP806_PLL_CR_0_CPU_CLK_DIV_RATIO_MASK, divider);
> +
> +       /* 2. Set Reload force */
> +       regmap_update_bits(clk->pll_cr_base, cpu_force_reg,
> +                          AP806_PLL_CR_0_CPU_CLK_RELOAD_FORCE_MASK,
> +                          AP806_PLL_CR_0_CPU_CLK_RELOAD_FORCE_MASK);
> +
> +       /* 3. Set Reload ratio */
> +       regmap_update_bits(clk->pll_cr_base, cpu_ratio_reg,
> +                          BIT(AP806_PLL_CR_0_CPU_CLK_RELOAD_RATIO_OFFSET),
> +                          BIT(AP806_PLL_CR_0_CPU_CLK_RELOAD_RATIO_OFFSET));
> +
> +
> +       /* 4. Wait for stabilizing CPU Clock */
> +       do {
> +               regmap_read(clk->pll_cr_base,
> +                           AP806_CA72MP2_0_PLL_SR_REG_OFFSET,
> +                           &reg);
> +       } while (!(reg & BIT(clk->cluster * AP806_CA72MP2_0_PLL_RATIO_STATE)));

Use regmap_read_poll_timeout() API here and give some large timeout
value so that we don't get stuck waiting here forever?

> +
> +       /* 5. Clear Reload ratio */

I'm not sure we really need these comments. They're just saying what the
code is doing, so they don't add much value.

> +       regmap_update_bits(clk->pll_cr_base, cpu_ratio_reg,
> +                          BIT(AP806_PLL_CR_0_CPU_CLK_RELOAD_RATIO_OFFSET), 0);
> +
> +       return 0;
> +}
> +
> +
> +static long ap_cpu_clk_round_rate(struct clk_hw *hw, unsigned long rate,
> +                                unsigned long *parent_rate)
> +{
> +       int divider = *parent_rate / rate;
> +
> +       if (divider > APN806_MAX_DIVIDER)
> +               divider = APN806_MAX_DIVIDER;

	divider = min(divider, APN806_MAX_DIVIDER);

> +
> +       return *parent_rate / divider;
> +}
> +
> +static const struct clk_ops ap_cpu_clk_ops = {
> +       .recalc_rate    = ap_cpu_clk_recalc_rate,
> +       .round_rate     = ap_cpu_clk_round_rate,
> +       .set_rate       = ap_cpu_clk_set_rate,
> +};
> +
> +static int ap_cpu_clock_probe(struct platform_device *pdev)
> +{
> +       int ret, nclusters = 0, cluster_index = 0;
> +       struct device *dev = &pdev->dev;
> +       struct device_node *dn, *np = dev->of_node;
> +       struct ap_cpu_clk *ap_cpu_clk;
> +       struct regmap *regmap;
> +
> +       regmap = syscon_node_to_regmap(np->parent);

Can we just call dev_get_remap() on pdev->dev.parent?

> +       if (IS_ERR(regmap)) {
> +               pr_err("cannot get pll_cr_base regmap\n");
> +               return PTR_ERR(regmap);
> +       }
> +
> +       /*
> +        * AP806 has 4 cpus and DFS for AP806 is controlled per
> +        * cluster (2 CPUs per cluster), cpu0 and cpu1 are fixed to
> +        * cluster0 while cpu2 and cpu3 are fixed to cluster1 whether
> +        * they are enabled or not.  Since cpu0 is the boot cpu, then
> +        * cluster0 must exist.  If cpu2 or cpu3 is enabled, cluster1
> +        * will exist and the cluster number is 2; otherwise the
> +        * cluster number is 1.
> +        */
> +       nclusters = 1;
> +       for_each_node_by_type(dn, "cpu") {

Isn't there some sort of for_each_cpu_node() API that can be used here?

> +               int cpu, err;
> +
> +               err = of_property_read_u32(dn, "reg", &cpu);
> +               if (WARN_ON(err))
> +                       return err;
> +
> +               /* If cpu2 or cpu3 is enabled */
> +               if ((cpu & APN806_CLUSTER_NUM_MASK)) {
> +                       nclusters = 2;
> +                       break;
> +               }
> +       }
> +       /*
> +        * DFS for AP806 is controlled per cluster (2 CPUs per cluster),
> +        * so allocate structs per cluster
> +        */
> +       ap_cpu_clk = devm_kcalloc(dev, nclusters, sizeof(*ap_cpu_clk),
> +                                 GFP_KERNEL);
> +       if (WARN_ON(!ap_cpu_clk))
> +               return -ENOMEM;
> +
> +       cluster_clks = devm_kcalloc(dev, nclusters, sizeof(*cluster_clks),
> +                                   GFP_KERNEL);
> +       if (WARN_ON(!cluster_clks))

Drop the WARN_ON please. If allocation fails we'll already get a large
stack trace and messages.

> +               return -ENOMEM;
> +
> +       for_each_node_by_type(dn, "cpu") {
> +               char *clk_name = "cpu-cluster-0";
> +               struct clk_init_data init;
> +               const char *parent_name;
> +               struct clk *clk, *parent;
> +               int cpu, err;
> +
> +               err = of_property_read_u32(dn, "reg", &cpu);
> +               if (WARN_ON(err))
> +                       return err;
> +
> +               cluster_index = (cpu & APN806_CLUSTER_NUM_MASK);

Nitpick: Drop useless parenthesis please.

> +               cluster_index >>= APN806_CLUSTER_NUM_OFFSET;
> +
> +               /* Initialize once for one cluster */
> +               if (cluster_clks[cluster_index])
> +                       continue;
> +
> +               parent = of_clk_get(np, cluster_index);
> +               if (IS_ERR(parent)) {
> +                       dev_err(dev, "Could not get the clock parent\n");
> +                       return -EINVAL;
> +               }
> +               parent_name =  __clk_get_name(parent);
> +               clk_name[12] += cluster_index;
> +               ap_cpu_clk[cluster_index].clk_name =
> +                       ap_cp_unique_name(dev, np->parent, clk_name);
> +               ap_cpu_clk[cluster_index].cluster = cluster_index;
> +               ap_cpu_clk[cluster_index].pll_cr_base = regmap;
> +               ap_cpu_clk[cluster_index].hw.init = &init;
> +               ap_cpu_clk[cluster_index].dev = dev;
> +
> +               init.name = ap_cpu_clk[cluster_index].clk_name;
> +               init.ops = &ap_cpu_clk_ops;
> +               init.num_parents = 1;
> +               init.parent_names = &parent_name;
> +               init.flags = CLK_GET_RATE_NOCACHE;

Can you add a comment why we need NOCACHE here? It isn't clear why this
is important.

> +
> +               clk = devm_clk_register(dev, &ap_cpu_clk[cluster_index].hw);

Please use the clk_hw based registration APIs.

> +               if (IS_ERR(clk))
> +                       return PTR_ERR(clk);
> +
> +               cluster_clks[cluster_index] = clk;
> +       }
> +
> +       clk_data.clk_num = cluster_index + 1;
> +       clk_data.clks = cluster_clks;
> +
> +       ret = of_clk_add_provider(np, of_clk_src_onecell_get, &clk_data);

And the clk_hw based provider APIs.

> +       if (ret)
> +               dev_err(dev, "failed to register OF clock provider\n");
> +
> +       return ret;
> +}
> +
Gregory CLEMENT Nov. 15, 2018, 11:22 p.m. UTC | #2
Hi Stephen,
 
 On mer., oct. 17 2018, Stephen Boyd <sboyd@kernel.org> wrote:

> Quoting Gregory CLEMENT (2018-09-22 11:17:06)
>> diff --git a/drivers/clk/mvebu/ap-cpu-clk.c b/drivers/clk/mvebu/ap-cpu-clk.c
>> new file mode 100644
>> index 000000000000..1b498cfe9191
>> --- /dev/null
>> +++ b/drivers/clk/mvebu/ap-cpu-clk.c
>> @@ -0,0 +1,265 @@
>> +// SPDX-License-Identifier: GPL-2.0+
>> +/*
>> + * Marvell Armada AP CPU Clock Controller
>> + *
>> + * Copyright (C) 2018 Marvell
>> + *
>> + * Omri Itach <omrii@marvell.com>
>> + * Gregory Clement <gregory.clement@bootlin.com>
>> + */
>> +
>> +#define pr_fmt(fmt) "ap-cpu-clk: " fmt
>> +
>> +#include "armada_ap_cp_helper.h"
>
> Does this include need to come first? Preferably local includes come
> after any <linux> ones.

OK

>
>> +#include <linux/clk-provider.h>
>> +#include <linux/clk.h>
>> +#include <linux/init.h>
>
> Is this used?

During development but no more now, I remove it.

>
>> +#include <linux/mfd/syscon.h>
>> +#include <linux/of.h>
>> +#include <linux/of_address.h>
>> +#include <linux/platform_device.h>
>> +#include <linux/regmap.h>
>> +
>> +#define AP806_CPU_CLUSTER0             0
>> +#define AP806_CPU_CLUSTER1             1
>> +#define AP806_CPUS_PER_CLUSTER         2
>> +#define APN806_CPU1_MASK               0x1
>> +
>> +#define APN806_CLUSTER_NUM_OFFSET      8
>> +#define APN806_CLUSTER_NUM_MASK                (1 << APN806_CLUSTER_NUM_OFFSET)
>> +
>> +#define APN806_MAX_DIVIDER             32
>> +
>> +/* AP806 CPU DFS register mapping*/
>> +#define AP806_CA72MP2_0_PLL_CR_0_REG_OFFSET            0x278
>> +#define AP806_CA72MP2_0_PLL_CR_1_REG_OFFSET            0x280
>> +#define AP806_CA72MP2_0_PLL_CR_2_REG_OFFSET            0x284
>> +#define AP806_CA72MP2_0_PLL_SR_REG_OFFSET              0xC94
>> +
>> +#define AP806_CA72MP2_0_PLL_CR_CLUSTER_OFFSET          0x14
>> +#define AP806_PLL_CR_0_CPU_CLK_DIV_RATIO_OFFSET                0
>> +#define AP806_PLL_CR_0_CPU_CLK_DIV_RATIO_MASK \
>> +                       (0x3f << AP806_PLL_CR_0_CPU_CLK_DIV_RATIO_OFFSET)
>> +#define AP806_PLL_CR_0_CPU_CLK_RELOAD_FORCE_OFFSET     24
>> +#define AP806_PLL_CR_0_CPU_CLK_RELOAD_FORCE_MASK \
>> +                       (0x1 << AP806_PLL_CR_0_CPU_CLK_RELOAD_FORCE_OFFSET)
>> +#define AP806_PLL_CR_0_CPU_CLK_RELOAD_RATIO_OFFSET     16
>> +#define AP806_CA72MP2_0_PLL_RATIO_STATE                        11
>> +
>> +#define to_clk(hw) container_of(hw, struct ap_cpu_clk, hw)
>> +
>> +/*
>> + * struct ap806_clk: CPU cluster clock controller instance
>> + * @cluster: Cluster clock controller index
>> + * @clk_name: Cluster clock controller name
>> + * @dev : Cluster clock device
>> + * @hw: HW specific structure of Cluster clock controller
>> + * @pll_cr_base: CA72MP2 Register base (Device Sample at Reset register)
>> + */
>> +struct ap_cpu_clk {
>> +       int cluster;
>
> unsigned?

I don't expect to have so many clusters that a signed int would be too
short :)

Actually I used int to let the compiler choose what ever he wants.

But if you really want I can use a unsigned int

>
>> +       const char *clk_name;
>> +       struct device *dev;
>> +       struct clk_hw hw;
>> +       struct regmap *pll_cr_base;
>> +};
>> +
>> +static struct clk **cluster_clks;
>> +static struct clk_onecell_data clk_data;
>
> Can you give these some better names that are ap_cpu specific? Grepping
> for clk_data will make lots of hits otherwise because they're so
> generic.

OK I will try to be more creative ;)

>
>> +
>> +static unsigned long ap_cpu_clk_recalc_rate(struct clk_hw *hw,
>> +                                           unsigned long parent_rate)
>> +{
>> +       struct ap_cpu_clk *clk = to_clk(hw);
>> +       unsigned int cpu_clkdiv_reg = AP806_CA72MP2_0_PLL_CR_0_REG_OFFSET +
>> +               (clk->cluster * AP806_CA72MP2_0_PLL_CR_CLUSTER_OFFSET);
>
> Nitpick: This is pretty unreadable. Can you just declare the variables
> up here and then do the assignment later on in the function and avoid
> splitting across many lines?

OK

>
>> +       int cpu_clkdiv_ratio;
>> +
>> +       regmap_read(clk->pll_cr_base, cpu_clkdiv_reg, &cpu_clkdiv_ratio);
>> +       cpu_clkdiv_ratio &= AP806_PLL_CR_0_CPU_CLK_DIV_RATIO_MASK;
>> +       cpu_clkdiv_ratio >>= AP806_PLL_CR_0_CPU_CLK_DIV_RATIO_OFFSET;
>> +
>> +       return parent_rate / cpu_clkdiv_ratio;
>> +}
>> +
>> +static int ap_cpu_clk_set_rate(struct clk_hw *hw, unsigned long rate,
>> +                             unsigned long parent_rate)
>> +{
>> +       struct ap_cpu_clk *clk = to_clk(hw);
>> +       int reg, divider = parent_rate / rate;
>> +       unsigned int cpu_clkdiv_reg, cpu_force_reg, cpu_ratio_reg;
>> +
>> +       cpu_clkdiv_reg = AP806_CA72MP2_0_PLL_CR_0_REG_OFFSET +
>> +               (clk->cluster * AP806_CA72MP2_0_PLL_CR_CLUSTER_OFFSET);
>> +       cpu_force_reg = AP806_CA72MP2_0_PLL_CR_1_REG_OFFSET +
>> +               (clk->cluster * AP806_CA72MP2_0_PLL_CR_CLUSTER_OFFSET);
>> +       cpu_ratio_reg = AP806_CA72MP2_0_PLL_CR_2_REG_OFFSET +
>> +               (clk->cluster * AP806_CA72MP2_0_PLL_CR_CLUSTER_OFFSET);
>> +
>> +       /* 1. Set CPU divider */
>> +       regmap_update_bits(clk->pll_cr_base, cpu_clkdiv_reg,
>> +                          AP806_PLL_CR_0_CPU_CLK_DIV_RATIO_MASK, divider);
>> +
>> +       /* 2. Set Reload force */
>> +       regmap_update_bits(clk->pll_cr_base, cpu_force_reg,
>> +                          AP806_PLL_CR_0_CPU_CLK_RELOAD_FORCE_MASK,
>> +                          AP806_PLL_CR_0_CPU_CLK_RELOAD_FORCE_MASK);
>> +
>> +       /* 3. Set Reload ratio */
>> +       regmap_update_bits(clk->pll_cr_base, cpu_ratio_reg,
>> +                          BIT(AP806_PLL_CR_0_CPU_CLK_RELOAD_RATIO_OFFSET),
>> +                          BIT(AP806_PLL_CR_0_CPU_CLK_RELOAD_RATIO_OFFSET));
>> +
>> +
>> +       /* 4. Wait for stabilizing CPU Clock */
>> +       do {
>> +               regmap_read(clk->pll_cr_base,
>> +                           AP806_CA72MP2_0_PLL_SR_REG_OFFSET,
>> +                           &reg);
>> +       } while (!(reg & BIT(clk->cluster * AP806_CA72MP2_0_PLL_RATIO_STATE)));
>
> Use regmap_read_poll_timeout() API here and give some large timeout
> value so that we don't get stuck waiting here forever?

Indeed regmap_read_poll_timeout() seems a nice improvement

>
>> +
>> +       /* 5. Clear Reload ratio */
>
> I'm not sure we really need these comments. They're just saying what the
> code is doing, so they don't add much value.

Actually, these 5 steps directly come from the datasheet.

>
>> +       regmap_update_bits(clk->pll_cr_base, cpu_ratio_reg,
>> +                          BIT(AP806_PLL_CR_0_CPU_CLK_RELOAD_RATIO_OFFSET), 0);
>> +
>> +       return 0;
>> +}
>> +
>> +
>> +static long ap_cpu_clk_round_rate(struct clk_hw *hw, unsigned long rate,
>> +                                unsigned long *parent_rate)
>> +{
>> +       int divider = *parent_rate / rate;
>> +
>> +       if (divider > APN806_MAX_DIVIDER)
>> +               divider = APN806_MAX_DIVIDER;
>
> 	divider = min(divider, APN806_MAX_DIVIDER);

OK
>
>> +
>> +       return *parent_rate / divider;
>> +}
>> +
>> +static const struct clk_ops ap_cpu_clk_ops = {
>> +       .recalc_rate    = ap_cpu_clk_recalc_rate,
>> +       .round_rate     = ap_cpu_clk_round_rate,
>> +       .set_rate       = ap_cpu_clk_set_rate,
>> +};
>> +
>> +static int ap_cpu_clock_probe(struct platform_device *pdev)
>> +{
>> +       int ret, nclusters = 0, cluster_index = 0;
>> +       struct device *dev = &pdev->dev;
>> +       struct device_node *dn, *np = dev->of_node;
>> +       struct ap_cpu_clk *ap_cpu_clk;
>> +       struct regmap *regmap;
>> +
>> +       regmap = syscon_node_to_regmap(np->parent);
>
> Can we just call dev_get_remap() on pdev->dev.parent?

we could do regmap = dev_get_regmap(pdev->dev.parent, NULL); instead of
this line. But is it really better?

>
>> +       if (IS_ERR(regmap)) {
>> +               pr_err("cannot get pll_cr_base regmap\n");
>> +               return PTR_ERR(regmap);
>> +       }
>> +
>> +       /*
>> +        * AP806 has 4 cpus and DFS for AP806 is controlled per
>> +        * cluster (2 CPUs per cluster), cpu0 and cpu1 are fixed to
>> +        * cluster0 while cpu2 and cpu3 are fixed to cluster1 whether
>> +        * they are enabled or not.  Since cpu0 is the boot cpu, then
>> +        * cluster0 must exist.  If cpu2 or cpu3 is enabled, cluster1
>> +        * will exist and the cluster number is 2; otherwise the
>> +        * cluster number is 1.
>> +        */
>> +       nclusters = 1;
>> +       for_each_node_by_type(dn, "cpu") {
>
> Isn't there some sort of for_each_cpu_node() API that can be used
> here?

Not when I wrote the code but since v4.20-rc1 we have
for_each_of_cpu_node(). So I am going to use it now.

>
>> +               int cpu, err;
>> +
>> +               err = of_property_read_u32(dn, "reg", &cpu);
>> +               if (WARN_ON(err))
>> +                       return err;
>> +
>> +               /* If cpu2 or cpu3 is enabled */
>> +               if ((cpu & APN806_CLUSTER_NUM_MASK)) {
>> +                       nclusters = 2;
>> +                       break;
>> +               }
>> +       }
>> +       /*
>> +        * DFS for AP806 is controlled per cluster (2 CPUs per cluster),
>> +        * so allocate structs per cluster
>> +        */
>> +       ap_cpu_clk = devm_kcalloc(dev, nclusters, sizeof(*ap_cpu_clk),
>> +                                 GFP_KERNEL);
>> +       if (WARN_ON(!ap_cpu_clk))
>> +               return -ENOMEM;
>> +
>> +       cluster_clks = devm_kcalloc(dev, nclusters, sizeof(*cluster_clks),
>> +                                   GFP_KERNEL);
>> +       if (WARN_ON(!cluster_clks))
>
> Drop the WARN_ON please. If allocation fails we'll already get a large
> stack trace and messages.

OK

>
>> +               return -ENOMEM;
>> +
>> +       for_each_node_by_type(dn, "cpu") {
>> +               char *clk_name = "cpu-cluster-0";
>> +               struct clk_init_data init;
>> +               const char *parent_name;
>> +               struct clk *clk, *parent;
>> +               int cpu, err;
>> +
>> +               err = of_property_read_u32(dn, "reg", &cpu);
>> +               if (WARN_ON(err))
>> +                       return err;
>> +
>> +               cluster_index = (cpu & APN806_CLUSTER_NUM_MASK);
>
> Nitpick: Drop useless parenthesis please.

OK
>
>> +               cluster_index >>= APN806_CLUSTER_NUM_OFFSET;
>> +
>> +               /* Initialize once for one cluster */
>> +               if (cluster_clks[cluster_index])
>> +                       continue;
>> +
>> +               parent = of_clk_get(np, cluster_index);
>> +               if (IS_ERR(parent)) {
>> +                       dev_err(dev, "Could not get the clock parent\n");
>> +                       return -EINVAL;
>> +               }
>> +               parent_name =  __clk_get_name(parent);
>> +               clk_name[12] += cluster_index;
>> +               ap_cpu_clk[cluster_index].clk_name =
>> +                       ap_cp_unique_name(dev, np->parent, clk_name);
>> +               ap_cpu_clk[cluster_index].cluster = cluster_index;
>> +               ap_cpu_clk[cluster_index].pll_cr_base = regmap;
>> +               ap_cpu_clk[cluster_index].hw.init = &init;
>> +               ap_cpu_clk[cluster_index].dev = dev;
>> +
>> +               init.name = ap_cpu_clk[cluster_index].clk_name;
>> +               init.ops = &ap_cpu_clk_ops;
>> +               init.num_parents = 1;
>> +               init.parent_names = &parent_name;
>> +               init.flags = CLK_GET_RATE_NOCACHE;
>
> Can you add a comment why we need NOCACHE here? It isn't clear why this
> is important.

I think there was the idea that the clock rate could be modified by
something else that the clk driver, but now I am not sure it is the
case. I will check it and either add a comment or just remove it.

>
>> +
>> +               clk = devm_clk_register(dev, &ap_cpu_clk[cluster_index].hw);
>
> Please use the clk_hw based registration APIs.

OK

>
>> +               if (IS_ERR(clk))
>> +                       return PTR_ERR(clk);
>> +
>> +               cluster_clks[cluster_index] = clk;
>> +       }
>> +
>> +       clk_data.clk_num = cluster_index + 1;
>> +       clk_data.clks = cluster_clks;
>> +
>> +       ret = of_clk_add_provider(np, of_clk_src_onecell_get, &clk_data);
>
> And the clk_hw based provider APIs.

OK
>
>> +       if (ret)
>> +               dev_err(dev, "failed to register OF clock provider\n");
>> +
>> +       return ret;
>> +}
>> +

Thanks,

Gregory
Gregory CLEMENT Nov. 27, 2018, 4:27 p.m. UTC | #3
Hi Stephen,
 
 On ven., nov. 16 2018, Gregory CLEMENT <gregory.clement@bootlin.com> wrote:

>>> +static int ap_cpu_clock_probe(struct platform_device *pdev)
>>> +{
>>> +       int ret, nclusters = 0, cluster_index = 0;
>>> +       struct device *dev = &pdev->dev;
>>> +       struct device_node *dn, *np = dev->of_node;
>>> +       struct ap_cpu_clk *ap_cpu_clk;
>>> +       struct regmap *regmap;
>>> +
>>> +       regmap = syscon_node_to_regmap(np->parent);
>>
>> Can we just call dev_get_remap() on pdev->dev.parent?
>
> we could do regmap = dev_get_regmap(pdev->dev.parent, NULL); instead of
> this line. But is it really better?

Actually we can't, because we really depend on a syscon and at this
moment there is no regmap. It is the syscon_node_to_regmap function which
creates this regmap when needed.

Gregory
Stephen Boyd Nov. 28, 2018, 9:58 p.m. UTC | #4
Quoting Gregory CLEMENT (2018-11-15 15:22:59)
>  On mer., oct. 17 2018, Stephen Boyd <sboyd@kernel.org> wrote:
> 
> > Quoting Gregory CLEMENT (2018-09-22 11:17:06)
> >> + * @cluster: Cluster clock controller index
> >> + * @clk_name: Cluster clock controller name
> >> + * @dev : Cluster clock device
> >> + * @hw: HW specific structure of Cluster clock controller
> >> + * @pll_cr_base: CA72MP2 Register base (Device Sample at Reset register)
> >> + */
> >> +struct ap_cpu_clk {
> >> +       int cluster;
> >
> > unsigned?
> 
> I don't expect to have so many clusters that a signed int would be too
> short :)
> 
> Actually I used int to let the compiler choose what ever he wants.
> 
> But if you really want I can use a unsigned int

It's not just about opening up a wider range of values. It's also about
being more explicit with the type by indicating a negative value is
impossible/doesn't make sense for the code. So yes please, change it to
unsigned int.

> 
> >
> >> +       const char *clk_name;
> >> +       struct device *dev;
> >> +       struct clk_hw hw;
> >> +       struct regmap *pll_cr_base;
> >> +};
> >> +
> >> +static struct clk **cluster_clks;
> >> +static struct clk_onecell_data clk_data;
> >
> > Can you give these some better names that are ap_cpu specific? Grepping
> > for clk_data will make lots of hits otherwise because they're so
> > generic.
> 
> OK I will try to be more creative ;)

Thanks.

> >> +                          BIT(AP806_PLL_CR_0_CPU_CLK_RELOAD_RATIO_OFFSET),
> >> +                          BIT(AP806_PLL_CR_0_CPU_CLK_RELOAD_RATIO_OFFSET));
> >> +
> >> +
> >> +       /* 4. Wait for stabilizing CPU Clock */
> >> +       do {
> >> +               regmap_read(clk->pll_cr_base,
> >> +                           AP806_CA72MP2_0_PLL_SR_REG_OFFSET,
> >> +                           &reg);
> >> +       } while (!(reg & BIT(clk->cluster * AP806_CA72MP2_0_PLL_RATIO_STATE)));
> >
> > Use regmap_read_poll_timeout() API here and give some large timeout
> > value so that we don't get stuck waiting here forever?
> 
> Indeed regmap_read_poll_timeout() seems a nice improvement
> 
> >
> >> +
> >> +       /* 5. Clear Reload ratio */
> >
> > I'm not sure we really need these comments. They're just saying what the
> > code is doing, so they don't add much value.
> 
> Actually, these 5 steps directly come from the datasheet.

Ok, still doesn't mean we need to copy the datasheet into code comments
though.

> 
> >
> >> +       regmap_update_bits(clk->pll_cr_base, cpu_ratio_reg,
> >> +                          BIT(AP806_PLL_CR_0_CPU_CLK_RELOAD_RATIO_OFFSET), 0);
> >> +
> >> +       return 0;
> >> +}
> >> +
> >> +
> >> +static long ap_cpu_clk_round_rate(struct clk_hw *hw, unsigned long rate,
> >> +                                unsigned long *parent_rate)
> >> +{
> >> +       int divider = *parent_rate / rate;
> >> +
> >> +       if (divider > APN806_MAX_DIVIDER)
> >> +               divider = APN806_MAX_DIVIDER;
> >
> >       divider = min(divider, APN806_MAX_DIVIDER);
> 
> OK
> >
> >> +
> >> +       return *parent_rate / divider;
> >> +}
> >> +
> >> +static const struct clk_ops ap_cpu_clk_ops = {
> >> +       .recalc_rate    = ap_cpu_clk_recalc_rate,
> >> +       .round_rate     = ap_cpu_clk_round_rate,
> >> +       .set_rate       = ap_cpu_clk_set_rate,
> >> +};
> >> +
> >> +static int ap_cpu_clock_probe(struct platform_device *pdev)
> >> +{
> >> +       int ret, nclusters = 0, cluster_index = 0;
> >> +       struct device *dev = &pdev->dev;
> >> +       struct device_node *dn, *np = dev->of_node;
> >> +       struct ap_cpu_clk *ap_cpu_clk;
> >> +       struct regmap *regmap;
> >> +
> >> +       regmap = syscon_node_to_regmap(np->parent);
> >
> > Can we just call dev_get_remap() on pdev->dev.parent?
> 
> we could do regmap = dev_get_regmap(pdev->dev.parent, NULL); instead of
> this line. But is it really better?

It allows us to ignore 'syscon' in this driver and drop the include of
that header file? 

> 
> >
> >> +       if (IS_ERR(regmap)) {
> >> +               pr_err("cannot get pll_cr_base regmap\n");
> >> +               return PTR_ERR(regmap);
> >> +       }
> >> +
> >> +       /*
> >> +        * AP806 has 4 cpus and DFS for AP806 is controlled per
> >> +        * cluster (2 CPUs per cluster), cpu0 and cpu1 are fixed to
> >> +        * cluster0 while cpu2 and cpu3 are fixed to cluster1 whether
> >> +        * they are enabled or not.  Since cpu0 is the boot cpu, then
> >> +        * cluster0 must exist.  If cpu2 or cpu3 is enabled, cluster1
> >> +        * will exist and the cluster number is 2; otherwise the
> >> +        * cluster number is 1.
> >> +        */
> >> +       nclusters = 1;
> >> +       for_each_node_by_type(dn, "cpu") {
> >
> > Isn't there some sort of for_each_cpu_node() API that can be used
> > here?
> 
> Not when I wrote the code but since v4.20-rc1 we have
> for_each_of_cpu_node(). So I am going to use it now.

Awesome!

> >
> >> +               cluster_index >>= APN806_CLUSTER_NUM_OFFSET;
> >> +
> >> +               /* Initialize once for one cluster */
> >> +               if (cluster_clks[cluster_index])
> >> +                       continue;
> >> +
> >> +               parent = of_clk_get(np, cluster_index);
> >> +               if (IS_ERR(parent)) {
> >> +                       dev_err(dev, "Could not get the clock parent\n");
> >> +                       return -EINVAL;
> >> +               }
> >> +               parent_name =  __clk_get_name(parent);
> >> +               clk_name[12] += cluster_index;
> >> +               ap_cpu_clk[cluster_index].clk_name =
> >> +                       ap_cp_unique_name(dev, np->parent, clk_name);
> >> +               ap_cpu_clk[cluster_index].cluster = cluster_index;
> >> +               ap_cpu_clk[cluster_index].pll_cr_base = regmap;
> >> +               ap_cpu_clk[cluster_index].hw.init = &init;
> >> +               ap_cpu_clk[cluster_index].dev = dev;
> >> +
> >> +               init.name = ap_cpu_clk[cluster_index].clk_name;
> >> +               init.ops = &ap_cpu_clk_ops;
> >> +               init.num_parents = 1;
> >> +               init.parent_names = &parent_name;
> >> +               init.flags = CLK_GET_RATE_NOCACHE;
> >
> > Can you add a comment why we need NOCACHE here? It isn't clear why this
> > is important.
> 
> I think there was the idea that the clock rate could be modified by
> something else that the clk driver, but now I am not sure it is the
> case. I will check it and either add a comment or just remove it.

Ok.
Stephen Boyd Nov. 28, 2018, 9:58 p.m. UTC | #5
Quoting Gregory CLEMENT (2018-11-27 08:27:40)
> Hi Stephen,
>  
>  On ven., nov. 16 2018, Gregory CLEMENT <gregory.clement@bootlin.com> wrote:
> 
> >>> +static int ap_cpu_clock_probe(struct platform_device *pdev)
> >>> +{
> >>> +       int ret, nclusters = 0, cluster_index = 0;
> >>> +       struct device *dev = &pdev->dev;
> >>> +       struct device_node *dn, *np = dev->of_node;
> >>> +       struct ap_cpu_clk *ap_cpu_clk;
> >>> +       struct regmap *regmap;
> >>> +
> >>> +       regmap = syscon_node_to_regmap(np->parent);
> >>
> >> Can we just call dev_get_remap() on pdev->dev.parent?
> >
> > we could do regmap = dev_get_regmap(pdev->dev.parent, NULL); instead of
> > this line. But is it really better?
> 
> Actually we can't, because we really depend on a syscon and at this
> moment there is no regmap. It is the syscon_node_to_regmap function which
> creates this regmap when needed.
> 

Ah ok. Nevermind then.
diff mbox series

Patch

diff --git a/drivers/clk/mvebu/Kconfig b/drivers/clk/mvebu/Kconfig
index 5492fae3f0ab..9d0b2f7cee21 100644
--- a/drivers/clk/mvebu/Kconfig
+++ b/drivers/clk/mvebu/Kconfig
@@ -39,6 +39,9 @@  config ARMADA_AP806_SYSCON
 	bool
 	select ARMADA_AP_CP_HELPER
 
+config ARMADA_AP_CPU_CLK
+	bool
+
 config ARMADA_CP110_SYSCON
 	bool
 	select ARMADA_AP_CP_HELPER
diff --git a/drivers/clk/mvebu/Makefile b/drivers/clk/mvebu/Makefile
index 6638ad962ec1..04464cef0f06 100644
--- a/drivers/clk/mvebu/Makefile
+++ b/drivers/clk/mvebu/Makefile
@@ -13,6 +13,7 @@  obj-$(CONFIG_ARMADA_37XX_CLK)	+= armada-37xx-tbg.o
 obj-$(CONFIG_ARMADA_37XX_CLK)	+= armada-37xx-periph.o
 obj-$(CONFIG_ARMADA_XP_CLK)	+= armada-xp.o mv98dx3236.o
 obj-$(CONFIG_ARMADA_AP806_SYSCON) += ap806-system-controller.o
+obj-$(CONFIG_ARMADA_AP_CPU_CLK) += ap-cpu-clk.o
 obj-$(CONFIG_ARMADA_CP110_SYSCON) += cp110-system-controller.o
 obj-$(CONFIG_DOVE_CLK)		+= dove.o dove-divider.o
 obj-$(CONFIG_KIRKWOOD_CLK)	+= kirkwood.o
diff --git a/drivers/clk/mvebu/ap-cpu-clk.c b/drivers/clk/mvebu/ap-cpu-clk.c
new file mode 100644
index 000000000000..1b498cfe9191
--- /dev/null
+++ b/drivers/clk/mvebu/ap-cpu-clk.c
@@ -0,0 +1,265 @@ 
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Marvell Armada AP CPU Clock Controller
+ *
+ * Copyright (C) 2018 Marvell
+ *
+ * Omri Itach <omrii@marvell.com>
+ * Gregory Clement <gregory.clement@bootlin.com>
+ */
+
+#define pr_fmt(fmt) "ap-cpu-clk: " fmt
+
+#include "armada_ap_cp_helper.h"
+#include <linux/clk-provider.h>
+#include <linux/clk.h>
+#include <linux/init.h>
+#include <linux/mfd/syscon.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+
+#define AP806_CPU_CLUSTER0		0
+#define AP806_CPU_CLUSTER1		1
+#define AP806_CPUS_PER_CLUSTER		2
+#define APN806_CPU1_MASK		0x1
+
+#define APN806_CLUSTER_NUM_OFFSET	8
+#define APN806_CLUSTER_NUM_MASK		(1 << APN806_CLUSTER_NUM_OFFSET)
+
+#define APN806_MAX_DIVIDER		32
+
+/* AP806 CPU DFS register mapping*/
+#define AP806_CA72MP2_0_PLL_CR_0_REG_OFFSET		0x278
+#define AP806_CA72MP2_0_PLL_CR_1_REG_OFFSET		0x280
+#define AP806_CA72MP2_0_PLL_CR_2_REG_OFFSET		0x284
+#define AP806_CA72MP2_0_PLL_SR_REG_OFFSET		0xC94
+
+#define AP806_CA72MP2_0_PLL_CR_CLUSTER_OFFSET		0x14
+#define AP806_PLL_CR_0_CPU_CLK_DIV_RATIO_OFFSET		0
+#define AP806_PLL_CR_0_CPU_CLK_DIV_RATIO_MASK \
+			(0x3f << AP806_PLL_CR_0_CPU_CLK_DIV_RATIO_OFFSET)
+#define AP806_PLL_CR_0_CPU_CLK_RELOAD_FORCE_OFFSET	24
+#define AP806_PLL_CR_0_CPU_CLK_RELOAD_FORCE_MASK \
+			(0x1 << AP806_PLL_CR_0_CPU_CLK_RELOAD_FORCE_OFFSET)
+#define AP806_PLL_CR_0_CPU_CLK_RELOAD_RATIO_OFFSET	16
+#define AP806_CA72MP2_0_PLL_RATIO_STATE			11
+
+#define to_clk(hw) container_of(hw, struct ap_cpu_clk, hw)
+
+/*
+ * struct ap806_clk: CPU cluster clock controller instance
+ * @cluster: Cluster clock controller index
+ * @clk_name: Cluster clock controller name
+ * @dev : Cluster clock device
+ * @hw: HW specific structure of Cluster clock controller
+ * @pll_cr_base: CA72MP2 Register base (Device Sample at Reset register)
+ */
+struct ap_cpu_clk {
+	int cluster;
+	const char *clk_name;
+	struct device *dev;
+	struct clk_hw hw;
+	struct regmap *pll_cr_base;
+};
+
+static struct clk **cluster_clks;
+static struct clk_onecell_data clk_data;
+
+static unsigned long ap_cpu_clk_recalc_rate(struct clk_hw *hw,
+					    unsigned long parent_rate)
+{
+	struct ap_cpu_clk *clk = to_clk(hw);
+	unsigned int cpu_clkdiv_reg = AP806_CA72MP2_0_PLL_CR_0_REG_OFFSET +
+		(clk->cluster * AP806_CA72MP2_0_PLL_CR_CLUSTER_OFFSET);
+	int cpu_clkdiv_ratio;
+
+	regmap_read(clk->pll_cr_base, cpu_clkdiv_reg, &cpu_clkdiv_ratio);
+	cpu_clkdiv_ratio &= AP806_PLL_CR_0_CPU_CLK_DIV_RATIO_MASK;
+	cpu_clkdiv_ratio >>= AP806_PLL_CR_0_CPU_CLK_DIV_RATIO_OFFSET;
+
+	return parent_rate / cpu_clkdiv_ratio;
+}
+
+static int ap_cpu_clk_set_rate(struct clk_hw *hw, unsigned long rate,
+			      unsigned long parent_rate)
+{
+	struct ap_cpu_clk *clk = to_clk(hw);
+	int reg, divider = parent_rate / rate;
+	unsigned int cpu_clkdiv_reg, cpu_force_reg, cpu_ratio_reg;
+
+	cpu_clkdiv_reg = AP806_CA72MP2_0_PLL_CR_0_REG_OFFSET +
+		(clk->cluster * AP806_CA72MP2_0_PLL_CR_CLUSTER_OFFSET);
+	cpu_force_reg = AP806_CA72MP2_0_PLL_CR_1_REG_OFFSET +
+		(clk->cluster * AP806_CA72MP2_0_PLL_CR_CLUSTER_OFFSET);
+	cpu_ratio_reg = AP806_CA72MP2_0_PLL_CR_2_REG_OFFSET +
+		(clk->cluster * AP806_CA72MP2_0_PLL_CR_CLUSTER_OFFSET);
+
+	/* 1. Set CPU divider */
+	regmap_update_bits(clk->pll_cr_base, cpu_clkdiv_reg,
+			   AP806_PLL_CR_0_CPU_CLK_DIV_RATIO_MASK, divider);
+
+	/* 2. Set Reload force */
+	regmap_update_bits(clk->pll_cr_base, cpu_force_reg,
+			   AP806_PLL_CR_0_CPU_CLK_RELOAD_FORCE_MASK,
+			   AP806_PLL_CR_0_CPU_CLK_RELOAD_FORCE_MASK);
+
+	/* 3. Set Reload ratio */
+	regmap_update_bits(clk->pll_cr_base, cpu_ratio_reg,
+			   BIT(AP806_PLL_CR_0_CPU_CLK_RELOAD_RATIO_OFFSET),
+			   BIT(AP806_PLL_CR_0_CPU_CLK_RELOAD_RATIO_OFFSET));
+
+
+	/* 4. Wait for stabilizing CPU Clock */
+	do {
+		regmap_read(clk->pll_cr_base,
+			    AP806_CA72MP2_0_PLL_SR_REG_OFFSET,
+			    &reg);
+	} while (!(reg & BIT(clk->cluster * AP806_CA72MP2_0_PLL_RATIO_STATE)));
+
+	/* 5. Clear Reload ratio */
+	regmap_update_bits(clk->pll_cr_base, cpu_ratio_reg,
+			   BIT(AP806_PLL_CR_0_CPU_CLK_RELOAD_RATIO_OFFSET), 0);
+
+	return 0;
+}
+
+
+static long ap_cpu_clk_round_rate(struct clk_hw *hw, unsigned long rate,
+				 unsigned long *parent_rate)
+{
+	int divider = *parent_rate / rate;
+
+	if (divider > APN806_MAX_DIVIDER)
+		divider = APN806_MAX_DIVIDER;
+
+	return *parent_rate / divider;
+}
+
+static const struct clk_ops ap_cpu_clk_ops = {
+	.recalc_rate	= ap_cpu_clk_recalc_rate,
+	.round_rate	= ap_cpu_clk_round_rate,
+	.set_rate	= ap_cpu_clk_set_rate,
+};
+
+static int ap_cpu_clock_probe(struct platform_device *pdev)
+{
+	int ret, nclusters = 0, cluster_index = 0;
+	struct device *dev = &pdev->dev;
+	struct device_node *dn, *np = dev->of_node;
+	struct ap_cpu_clk *ap_cpu_clk;
+	struct regmap *regmap;
+
+	regmap = syscon_node_to_regmap(np->parent);
+	if (IS_ERR(regmap)) {
+		pr_err("cannot get pll_cr_base regmap\n");
+		return PTR_ERR(regmap);
+	}
+
+	/*
+	 * AP806 has 4 cpus and DFS for AP806 is controlled per
+	 * cluster (2 CPUs per cluster), cpu0 and cpu1 are fixed to
+	 * cluster0 while cpu2 and cpu3 are fixed to cluster1 whether
+	 * they are enabled or not.  Since cpu0 is the boot cpu, then
+	 * cluster0 must exist.  If cpu2 or cpu3 is enabled, cluster1
+	 * will exist and the cluster number is 2; otherwise the
+	 * cluster number is 1.
+	 */
+	nclusters = 1;
+	for_each_node_by_type(dn, "cpu") {
+		int cpu, err;
+
+		err = of_property_read_u32(dn, "reg", &cpu);
+		if (WARN_ON(err))
+			return err;
+
+		/* If cpu2 or cpu3 is enabled */
+		if ((cpu & APN806_CLUSTER_NUM_MASK)) {
+			nclusters = 2;
+			break;
+		}
+	}
+	/*
+	 * DFS for AP806 is controlled per cluster (2 CPUs per cluster),
+	 * so allocate structs per cluster
+	 */
+	ap_cpu_clk = devm_kcalloc(dev, nclusters, sizeof(*ap_cpu_clk),
+				  GFP_KERNEL);
+	if (WARN_ON(!ap_cpu_clk))
+		return -ENOMEM;
+
+	cluster_clks = devm_kcalloc(dev, nclusters, sizeof(*cluster_clks),
+				    GFP_KERNEL);
+	if (WARN_ON(!cluster_clks))
+		return -ENOMEM;
+
+	for_each_node_by_type(dn, "cpu") {
+		char *clk_name = "cpu-cluster-0";
+		struct clk_init_data init;
+		const char *parent_name;
+		struct clk *clk, *parent;
+		int cpu, err;
+
+		err = of_property_read_u32(dn, "reg", &cpu);
+		if (WARN_ON(err))
+			return err;
+
+		cluster_index = (cpu & APN806_CLUSTER_NUM_MASK);
+		cluster_index >>= APN806_CLUSTER_NUM_OFFSET;
+
+		/* Initialize once for one cluster */
+		if (cluster_clks[cluster_index])
+			continue;
+
+		parent = of_clk_get(np, cluster_index);
+		if (IS_ERR(parent)) {
+			dev_err(dev, "Could not get the clock parent\n");
+			return -EINVAL;
+		}
+		parent_name =  __clk_get_name(parent);
+		clk_name[12] += cluster_index;
+		ap_cpu_clk[cluster_index].clk_name =
+			ap_cp_unique_name(dev, np->parent, clk_name);
+		ap_cpu_clk[cluster_index].cluster = cluster_index;
+		ap_cpu_clk[cluster_index].pll_cr_base = regmap;
+		ap_cpu_clk[cluster_index].hw.init = &init;
+		ap_cpu_clk[cluster_index].dev = dev;
+
+		init.name = ap_cpu_clk[cluster_index].clk_name;
+		init.ops = &ap_cpu_clk_ops;
+		init.num_parents = 1;
+		init.parent_names = &parent_name;
+		init.flags = CLK_GET_RATE_NOCACHE;
+
+		clk = devm_clk_register(dev, &ap_cpu_clk[cluster_index].hw);
+		if (IS_ERR(clk))
+			return PTR_ERR(clk);
+
+		cluster_clks[cluster_index] = clk;
+	}
+
+	clk_data.clk_num = cluster_index + 1;
+	clk_data.clks = cluster_clks;
+
+	ret = of_clk_add_provider(np, of_clk_src_onecell_get, &clk_data);
+	if (ret)
+		dev_err(dev, "failed to register OF clock provider\n");
+
+	return ret;
+}
+
+static const struct of_device_id ap_cpu_clock_of_match[] = {
+	{ .compatible = "marvell,ap806-cpu-clock", },
+	{ }
+};
+
+static struct platform_driver ap_cpu_clock_driver = {
+	.probe = ap_cpu_clock_probe,
+	.driver		= {
+		.name	= "marvell-ap-cpu-clock",
+		.of_match_table = ap_cpu_clock_of_match,
+		.suppress_bind_attrs = true,
+	},
+};
+builtin_platform_driver(ap_cpu_clock_driver);