diff mbox

[v6,1/4] clk: Add support for runtime PM

Message ID 1490182543-28550-2-git-send-email-m.szyprowski@samsung.com (mailing list archive)
State Not Applicable, archived
Headers show

Commit Message

Marek Szyprowski March 22, 2017, 11:35 a.m. UTC
Registers for some clocks might be located in the SOC area, which are under the
power domain. To enable access to those registers respective domain has to be
turned on. Additionally, registers for such clocks will usually loose its
contents when power domain is turned off, so additional saving and restoring of
them might be needed in the clock controller driver.

This patch adds basic infrastructure in the clocks core to allow implementing
driver for such clocks under power domains. Clock provider can supply a
struct device pointer, which is the used by clock core for tracking and managing
clock's controller runtime pm state. Each clk_prepare() operation
will first call pm_runtime_get_sync() on the supplied device, while
clk_unprepare() will do pm_runtime_put_sync() at the end.

Additional calls to pm_runtime_get/put functions are required to ensure that any
register access (like calculating/changing clock rates and unpreparing/disabling
unused clocks on boot) will be done with clock controller in runtime resumend
state.

When one wants to register clock controller, which make use of this feature, he
has to:
1. Provide a struct device to the core when registering the provider.
2. Ensure to enable runtime PM for that device before registering clocks.
3. Make sure that the runtime PM status of the controller device reflects
   the HW state.

Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/clk/clk.c | 129 +++++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 114 insertions(+), 15 deletions(-)

Comments

Michael Turquette March 29, 2017, 8:22 p.m. UTC | #1
Hi Marek,

Quoting Marek Szyprowski (2017-03-22 04:35:40)
> Registers for some clocks might be located in the SOC area, which are under the
> power domain. To enable access to those registers respective domain has to be
> turned on. Additionally, registers for such clocks will usually loose its
> contents when power domain is turned off, so additional saving and restoring of
> them might be needed in the clock controller driver.
> 
> This patch adds basic infrastructure in the clocks core to allow implementing
> driver for such clocks under power domains. Clock provider can supply a
> struct device pointer, which is the used by clock core for tracking and managing
> clock's controller runtime pm state. Each clk_prepare() operation
> will first call pm_runtime_get_sync() on the supplied device, while
> clk_unprepare() will do pm_runtime_put_sync() at the end.
> 
> Additional calls to pm_runtime_get/put functions are required to ensure that any
> register access (like calculating/changing clock rates and unpreparing/disabling
> unused clocks on boot) will be done with clock controller in runtime resumend
> state.

The above is a bit confusing. Is clk_prepare really special? It seems to
me that every single clk_ops callback will need this?

That leads to my second question: why put this in the clk core? Can the
clk provider driver simply add pm_runtime_{get,put} calls into its
clk_ops callbacks instead? The clk core does not directly touch hardware
(e.g. reading registers) so putting the pm runtime calls into the
provider callbacks should be sufficient.

> 
> When one wants to register clock controller, which make use of this feature, he
> has to:
> 1. Provide a struct device to the core when registering the provider.
> 2. Ensure to enable runtime PM for that device before registering clocks.
> 3. Make sure that the runtime PM status of the controller device reflects
>    the HW state.

Third question: is there a case where more than one device is required?
Is is possible that a single pm_runtime_get call against a single device
will not be enough for some clk provider drivers? If so then this
solution does not scale very well and the clk provider driver will have
to implement this in the clk_ops callbacks (as I mentioned above in my
second question).

Fourth & final question: I'm under the impression that pm runtime calls
be be nested and re-enter, but I want to make sure (Ulf?). For instance
it is highly likely that this new feature would cause something like:

pm_runtime_get()              - called by random driver
-> clk_prepare_enable()       - genpd enables functioal clocks
   -> pm_runtime_get()        - called by clk_pm_runtime_get in clk core
      -> clk_prepare_enable() - genpd enables interface or bus clocks

I guess this is safe from the pm_runtime_get/genpd perspective, but want
to make sure first.

Thanks,
Mike

> 
> Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
> Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
> ---
>  drivers/clk/clk.c | 129 +++++++++++++++++++++++++++++++++++++++++++++++-------
>  1 file changed, 114 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
> index 0fb39fe217d1..a6001b6e49a7 100644
> --- a/drivers/clk/clk.c
> +++ b/drivers/clk/clk.c
> @@ -21,6 +21,7 @@
>  #include <linux/of.h>
>  #include <linux/device.h>
>  #include <linux/init.h>
> +#include <linux/pm_runtime.h>
>  #include <linux/sched.h>
>  #include <linux/clkdev.h>
>  
> @@ -46,6 +47,7 @@ struct clk_core {
>         const struct clk_ops    *ops;
>         struct clk_hw           *hw;
>         struct module           *owner;
> +       struct device           *dev;
>         struct clk_core         *parent;
>         const char              **parent_names;
>         struct clk_core         **parents;
> @@ -87,6 +89,26 @@ struct clk {
>         struct hlist_node clks_node;
>  };
>  
> +/***           runtime pm          ***/
> +static int clk_pm_runtime_get(struct clk_core *core)
> +{
> +       int ret = 0;
> +
> +       if (!core->dev)
> +               return 0;
> +
> +       ret = pm_runtime_get_sync(core->dev);
> +       return ret < 0 ? ret : 0;
> +}
> +
> +static void clk_pm_runtime_put(struct clk_core *core)
> +{
> +       if (!core->dev)
> +               return;
> +
> +       pm_runtime_put_sync(core->dev);
> +}
> +
>  /***           locking             ***/
>  static void clk_prepare_lock(void)
>  {
> @@ -150,6 +172,8 @@ static void clk_enable_unlock(unsigned long flags)
>  
>  static bool clk_core_is_prepared(struct clk_core *core)
>  {
> +       bool ret = false;
> +
>         /*
>          * .is_prepared is optional for clocks that can prepare
>          * fall back to software usage counter if it is missing
> @@ -157,11 +181,18 @@ static bool clk_core_is_prepared(struct clk_core *core)
>         if (!core->ops->is_prepared)
>                 return core->prepare_count;
>  
> -       return core->ops->is_prepared(core->hw);
> +       if (!clk_pm_runtime_get(core)) {
> +               ret = core->ops->is_prepared(core->hw);
> +               clk_pm_runtime_put(core);
> +       }
> +
> +       return ret;
>  }
>  
>  static bool clk_core_is_enabled(struct clk_core *core)
>  {
> +       bool ret = false;
> +
>         /*
>          * .is_enabled is only mandatory for clocks that gate
>          * fall back to software usage counter if .is_enabled is missing
> @@ -169,7 +200,29 @@ static bool clk_core_is_enabled(struct clk_core *core)
>         if (!core->ops->is_enabled)
>                 return core->enable_count;
>  
> -       return core->ops->is_enabled(core->hw);
> +       /*
> +        * Check if clock controller's device is runtime active before
> +        * calling .is_enabled callback. If not, assume that clock is
> +        * disabled, because we might be called from atomic context, from
> +        * which pm_runtime_get() is not allowed.
> +        * This function is called mainly from clk_disable_unused_subtree,
> +        * which ensures proper runtime pm activation of controller before
> +        * taking enable spinlock, but the below check is needed if one tries
> +        * to call it from other places.
> +        */
> +       if (core->dev) {
> +               pm_runtime_get_noresume(core->dev);
> +               if (!pm_runtime_active(core->dev)) {
> +                       ret = false;
> +                       goto done;
> +               }
> +       }
> +
> +       ret = core->ops->is_enabled(core->hw);
> +done:
> +       clk_pm_runtime_put(core);
> +
> +       return ret;
>  }
>  
>  /***    helper functions   ***/
> @@ -489,6 +542,8 @@ static void clk_core_unprepare(struct clk_core *core)
>         if (core->ops->unprepare)
>                 core->ops->unprepare(core->hw);
>  
> +       clk_pm_runtime_put(core);
> +
>         trace_clk_unprepare_complete(core);
>         clk_core_unprepare(core->parent);
>  }
> @@ -530,10 +585,14 @@ static int clk_core_prepare(struct clk_core *core)
>                 return 0;
>  
>         if (core->prepare_count == 0) {
> -               ret = clk_core_prepare(core->parent);
> +               ret = clk_pm_runtime_get(core);
>                 if (ret)
>                         return ret;
>  
> +               ret = clk_core_prepare(core->parent);
> +               if (ret)
> +                       goto runtime_put;
> +
>                 trace_clk_prepare(core);
>  
>                 if (core->ops->prepare)
> @@ -541,15 +600,18 @@ static int clk_core_prepare(struct clk_core *core)
>  
>                 trace_clk_prepare_complete(core);
>  
> -               if (ret) {
> -                       clk_core_unprepare(core->parent);
> -                       return ret;
> -               }
> +               if (ret)
> +                       goto unprepare;
>         }
>  
>         core->prepare_count++;
>  
>         return 0;
> +unprepare:
> +       clk_core_unprepare(core->parent);
> +runtime_put:
> +       clk_pm_runtime_put(core);
> +       return ret;
>  }
>  
>  static int clk_core_prepare_lock(struct clk_core *core)
> @@ -745,6 +807,9 @@ static void clk_unprepare_unused_subtree(struct clk_core *core)
>         if (core->flags & CLK_IGNORE_UNUSED)
>                 return;
>  
> +       if (clk_pm_runtime_get(core))
> +               return;
> +
>         if (clk_core_is_prepared(core)) {
>                 trace_clk_unprepare(core);
>                 if (core->ops->unprepare_unused)
> @@ -753,6 +818,8 @@ static void clk_unprepare_unused_subtree(struct clk_core *core)
>                         core->ops->unprepare(core->hw);
>                 trace_clk_unprepare_complete(core);
>         }
> +
> +       clk_pm_runtime_put(core);
>  }
>  
>  static void clk_disable_unused_subtree(struct clk_core *core)
> @@ -768,6 +835,9 @@ static void clk_disable_unused_subtree(struct clk_core *core)
>         if (core->flags & CLK_OPS_PARENT_ENABLE)
>                 clk_core_prepare_enable(core->parent);
>  
> +       if (clk_pm_runtime_get(core))
> +               goto unprepare_out;
> +
>         flags = clk_enable_lock();
>  
>         if (core->enable_count)
> @@ -792,6 +862,8 @@ static void clk_disable_unused_subtree(struct clk_core *core)
>  
>  unlock_out:
>         clk_enable_unlock(flags);
> +       clk_pm_runtime_put(core);
> +unprepare_out:
>         if (core->flags & CLK_OPS_PARENT_ENABLE)
>                 clk_core_disable_unprepare(core->parent);
>  }
> @@ -1036,9 +1108,13 @@ long clk_get_accuracy(struct clk *clk)
>  static unsigned long clk_recalc(struct clk_core *core,
>                                 unsigned long parent_rate)
>  {
> -       if (core->ops->recalc_rate)
> -               return core->ops->recalc_rate(core->hw, parent_rate);
> -       return parent_rate;
> +       unsigned long rate = parent_rate;
> +
> +       if (core->ops->recalc_rate && !clk_pm_runtime_get(core)) {
> +               rate = core->ops->recalc_rate(core->hw, parent_rate);
> +               clk_pm_runtime_put(core);
> +       }
> +       return rate;
>  }
>  
>  /**
> @@ -1563,6 +1639,7 @@ static int clk_core_set_rate_nolock(struct clk_core *core,
>  {
>         struct clk_core *top, *fail_clk;
>         unsigned long rate = req_rate;
> +       int ret = 0;
>  
>         if (!core)
>                 return 0;
> @@ -1579,21 +1656,28 @@ static int clk_core_set_rate_nolock(struct clk_core *core,
>         if (!top)
>                 return -EINVAL;
>  
> +       ret = clk_pm_runtime_get(core);
> +       if (ret)
> +               return ret;
> +
>         /* notify that we are about to change rates */
>         fail_clk = clk_propagate_rate_change(top, PRE_RATE_CHANGE);
>         if (fail_clk) {
>                 pr_debug("%s: failed to set %s rate\n", __func__,
>                                 fail_clk->name);
>                 clk_propagate_rate_change(top, ABORT_RATE_CHANGE);
> -               return -EBUSY;
> +               ret = -EBUSY;
> +               goto err;
>         }
>  
>         /* change the rates */
>         clk_change_rate(top);
>  
>         core->req_rate = req_rate;
> +err:
> +       clk_pm_runtime_put(core);
>  
> -       return 0;
> +       return ret;
>  }
>  
>  /**
> @@ -1824,12 +1908,16 @@ static int clk_core_set_parent(struct clk_core *core, struct clk_core *parent)
>                 p_rate = parent->rate;
>         }
>  
> +       ret = clk_pm_runtime_get(core);
> +       if (ret)
> +               goto out;
> +
>         /* propagate PRE_RATE_CHANGE notifications */
>         ret = __clk_speculate_rates(core, p_rate);
>  
>         /* abort if a driver objects */
>         if (ret & NOTIFY_STOP_MASK)
> -               goto out;
> +               goto runtime_put;
>  
>         /* do the re-parent */
>         ret = __clk_set_parent(core, parent, p_index);
> @@ -1842,6 +1930,8 @@ static int clk_core_set_parent(struct clk_core *core, struct clk_core *parent)
>                 __clk_recalc_accuracies(core);
>         }
>  
> +runtime_put:
> +       clk_pm_runtime_put(core);
>  out:
>         clk_prepare_unlock();
>  
> @@ -2549,6 +2639,12 @@ struct clk *clk_register(struct device *dev, struct clk_hw *hw)
>                 goto fail_name;
>         }
>         core->ops = hw->init->ops;
> +       if (dev && pm_runtime_enabled(dev)) {
> +               core->dev = dev;
> +               ret = clk_pm_runtime_get(core);
> +               if (ret)
> +                       goto fail_pm;
> +       }
>         if (dev && dev->driver)
>                 core->owner = dev->driver->owner;
>         core->hw = hw;
> @@ -2595,12 +2691,13 @@ struct clk *clk_register(struct device *dev, struct clk_hw *hw)
>         }
>  
>         ret = __clk_core_init(core);
> -       if (!ret)
> +       if (!ret) {
> +               clk_pm_runtime_put(core);
>                 return hw->clk;
> +       }
>  
>         __clk_free_clk(hw->clk);
>         hw->clk = NULL;
> -
>  fail_parents:
>         kfree(core->parents);
>  fail_parent_names_copy:
> @@ -2608,6 +2705,8 @@ struct clk *clk_register(struct device *dev, struct clk_hw *hw)
>                 kfree_const(core->parent_names[i]);
>         kfree(core->parent_names);
>  fail_parent_names:
> +       clk_pm_runtime_put(core);
> +fail_pm:
>         kfree_const(core->name);
>  fail_name:
>         kfree(core);
> -- 
> 1.9.1
>
Marek Szyprowski March 30, 2017, 7:24 a.m. UTC | #2
Hi Michael,

On 2017-03-29 22:22, Michael Turquette wrote:
> Quoting Marek Szyprowski (2017-03-22 04:35:40)
>> Registers for some clocks might be located in the SOC area, which are under the
>> power domain. To enable access to those registers respective domain has to be
>> turned on. Additionally, registers for such clocks will usually loose its
>> contents when power domain is turned off, so additional saving and restoring of
>> them might be needed in the clock controller driver.
>>
>> This patch adds basic infrastructure in the clocks core to allow implementing
>> driver for such clocks under power domains. Clock provider can supply a
>> struct device pointer, which is the used by clock core for tracking and managing
>> clock's controller runtime pm state. Each clk_prepare() operation
>> will first call pm_runtime_get_sync() on the supplied device, while
>> clk_unprepare() will do pm_runtime_put_sync() at the end.
>>
>> Additional calls to pm_runtime_get/put functions are required to ensure that any
>> register access (like calculating/changing clock rates and unpreparing/disabling
>> unused clocks on boot) will be done with clock controller in runtime resumend
>> state.
> The above is a bit confusing. Is clk_prepare really special? It seems to
> me that every single clk_ops callback will need this?

clk_prepare/unprepare are special, because they allow sleeping, so they 
are natural
candidates for the place for calling runtime PM operations. 
clk_enable()/disable()
is called under a spinlock, so runtime pm cannot be called efficiently 
there, but
core guarantees that they are called after clk_prepare(), so accessing 
hw registers
is safe. The only remaining calls are not guaranteed to be called always 
after
clk_prepare(), so those additional calls and checks in runtime pm are 
needed there.

> That leads to my second question: why put this in the clk core? Can the
> clk provider driver simply add pm_runtime_{get,put} calls into its
> clk_ops callbacks instead? The clk core does not directly touch hardware
> (e.g. reading registers) so putting the pm runtime calls into the
> provider callbacks should be sufficient.

In theory is should be possible to duplicate all kind of clock build blocks
(gates, muxes, dividers, ...) with additional runtime pm calls. This 
would however
end in much more code and a bit more complicated locking. Implementing 
it in clk
core made the code simpler. It also turned out that runtime pm 
integration is
needed for more that a single clock provider: besides Samsung SoCs 
(Exynos 5433
and newer, Exynos 4412 ISP, Exynos Audio Subsystem, hacks in Exynos 542x 
can be
also replaced by runtime PM calls), Ulf mentioned that exactly similar 
pattern
is used for some UX500 SoCs (STE). More will probably come once the 
feature is
in, because for now such drivers simply forces runtime active state as a
workaround or don't instantiate related power domains at all.

It is not that uncommon to have runtime PM integrated in the framework 
(examples:
mmc, scsi). Please not that this is optional for clock providers - if 
they don't
enable runtime PM for the provided clock controller device during clock
registration, the clock core will behave exactly the same way as now.

>> When one wants to register clock controller, which make use of this feature, he
>> has to:
>> 1. Provide a struct device to the core when registering the provider.
>> 2. Ensure to enable runtime PM for that device before registering clocks.
>> 3. Make sure that the runtime PM status of the controller device reflects
>>     the HW state.
> Third question: is there a case where more than one device is required?
> Is is possible that a single pm_runtime_get call against a single device
> will not be enough for some clk provider drivers? If so then this
> solution does not scale very well and the clk provider driver will have
> to implement this in the clk_ops callbacks (as I mentioned above in my
> second question).

This is a generic question about runtime PM. There are various methods to
model hardware relations to control pm/runtime pm state of a set of devices:
child-parent-bus relations (setting child to active state also activates a
parent), gen_pd power domains and sub-domains and recently merged device pm
links, which allows to model relations across the typical child-parent tree
hierarchy. IMHO device core pm related framework provides enough features to
solve the case when one needs to control more than one device - what is
worth to mention - in all cases the client only need to call pm_runtime
funtions on the ONE leaf device, everything else will be handled by pm core.

> Fourth & final question: I'm under the impression that pm runtime calls
> be be nested and re-enter, but I want to make sure (Ulf?). For instance
> it is highly likely that this new feature would cause something like:
>
> pm_runtime_get()              - called by random driver
> -> clk_prepare_enable()       - genpd enables functioal clocks
>     -> pm_runtime_get()        - called by clk_pm_runtime_get in clk core
>        -> clk_prepare_enable() - genpd enables interface or bus clocks
>
> I guess this is safe from the pm_runtime_get/genpd perspective, but want
> to make sure first.

Yes, this will work fine after recent fixes. Tested with Exynos IIS ASoC
driver (which is also a clock provider), which in turn is a client for
Exynos Audio Subsystem clock provider.

> Thanks,

 >> [...]

Best regards
Ulf Hansson March 30, 2017, 4:29 p.m. UTC | #3
[...]

>
> Fourth & final question: I'm under the impression that pm runtime calls
> be be nested and re-enter, but I want to make sure (Ulf?). For instance
> it is highly likely that this new feature would cause something like:
>
> pm_runtime_get()              - called by random driver
> -> clk_prepare_enable()       - genpd enables functioal clocks
>    -> pm_runtime_get()        - called by clk_pm_runtime_get in clk core
>       -> clk_prepare_enable() - genpd enables interface or bus clocks
>
> I guess this is safe from the pm_runtime_get/genpd perspective, but want
> to make sure first.

I noticed Marek's reply, which perhaps is sufficient. However, to
clarify also from genpd and runtime PM point of view the re-entrant
issue is safe and it is already quite commonly used.

For example, from a random driver's ->runtime_resume() callback
clk_prepare_enable() is called. When such clock is managed behind a
i2c interface the i2c controller also needs to be runtime resumed,
meaning its driver calls pm_runtime_get_sync(), before it can serve
the request and ungate the clock.

[...]

Kind regards
Uffe
Michael Turquette March 30, 2017, 5:11 p.m. UTC | #4
Quoting Marek Szyprowski (2017-03-30 00:24:15)
> Hi Michael,
> 
> On 2017-03-29 22:22, Michael Turquette wrote:
> > Quoting Marek Szyprowski (2017-03-22 04:35:40)
> >> Registers for some clocks might be located in the SOC area, which are under the
> >> power domain. To enable access to those registers respective domain has to be
> >> turned on. Additionally, registers for such clocks will usually loose its
> >> contents when power domain is turned off, so additional saving and restoring of
> >> them might be needed in the clock controller driver.
> >>
> >> This patch adds basic infrastructure in the clocks core to allow implementing
> >> driver for such clocks under power domains. Clock provider can supply a
> >> struct device pointer, which is the used by clock core for tracking and managing
> >> clock's controller runtime pm state. Each clk_prepare() operation
> >> will first call pm_runtime_get_sync() on the supplied device, while
> >> clk_unprepare() will do pm_runtime_put_sync() at the end.
> >>
> >> Additional calls to pm_runtime_get/put functions are required to ensure that any
> >> register access (like calculating/changing clock rates and unpreparing/disabling
> >> unused clocks on boot) will be done with clock controller in runtime resumend
> >> state.
> > The above is a bit confusing. Is clk_prepare really special? It seems to
> > me that every single clk_ops callback will need this?
> 
> clk_prepare/unprepare are special, because they allow sleeping, so they 
> are natural
> candidates for the place for calling runtime PM operations. 
> clk_enable()/disable()
> is called under a spinlock, so runtime pm cannot be called efficiently 
> there, but
> core guarantees that they are called after clk_prepare(), so accessing 
> hw registers
> is safe. The only remaining calls are not guaranteed to be called always 
> after
> clk_prepare(), so those additional calls and checks in runtime pm are 
> needed there.

Right, so any call that holds prepare_lock and might touch the hardware
needs to first call pm_runtime_get_sync.

> 
> > That leads to my second question: why put this in the clk core? Can the
> > clk provider driver simply add pm_runtime_{get,put} calls into its
> > clk_ops callbacks instead? The clk core does not directly touch hardware
> > (e.g. reading registers) so putting the pm runtime calls into the
> > provider callbacks should be sufficient.
> 
> In theory is should be possible to duplicate all kind of clock build blocks
> (gates, muxes, dividers, ...) with additional runtime pm calls. This 
> would however
> end in much more code and a bit more complicated locking. Implementing 
> it in clk
> core made the code simpler. It also turned out that runtime pm 
> integration is
> needed for more that a single clock provider: besides Samsung SoCs 
> (Exynos 5433
> and newer, Exynos 4412 ISP, Exynos Audio Subsystem, hacks in Exynos 542x 
> can be
> also replaced by runtime PM calls), Ulf mentioned that exactly similar 
> pattern
> is used for some UX500 SoCs (STE). More will probably come once the 
> feature is
> in, because for now such drivers simply forces runtime active state as a
> workaround or don't instantiate related power domains at all.

I agree with the above, but I'm also wondering about the folks that use
regmap internally to their own clock provider drivers. I guess they will
have the option to either do their own thing or use this framework-level
solution.

I've started reviewing the code itself and will respond to those mails
separately.

Thanks,
Mike

> 
> It is not that uncommon to have runtime PM integrated in the framework 
> (examples:
> mmc, scsi). Please not that this is optional for clock providers - if 
> they don't
> enable runtime PM for the provided clock controller device during clock
> registration, the clock core will behave exactly the same way as now.
> 
> >> When one wants to register clock controller, which make use of this feature, he
> >> has to:
> >> 1. Provide a struct device to the core when registering the provider.
> >> 2. Ensure to enable runtime PM for that device before registering clocks.
> >> 3. Make sure that the runtime PM status of the controller device reflects
> >>     the HW state.
> > Third question: is there a case where more than one device is required?
> > Is is possible that a single pm_runtime_get call against a single device
> > will not be enough for some clk provider drivers? If so then this
> > solution does not scale very well and the clk provider driver will have
> > to implement this in the clk_ops callbacks (as I mentioned above in my
> > second question).
> 
> This is a generic question about runtime PM. There are various methods to
> model hardware relations to control pm/runtime pm state of a set of devices:
> child-parent-bus relations (setting child to active state also activates a
> parent), gen_pd power domains and sub-domains and recently merged device pm
> links, which allows to model relations across the typical child-parent tree
> hierarchy. IMHO device core pm related framework provides enough features to
> solve the case when one needs to control more than one device - what is
> worth to mention - in all cases the client only need to call pm_runtime
> funtions on the ONE leaf device, everything else will be handled by pm core.
> 
> > Fourth & final question: I'm under the impression that pm runtime calls
> > be be nested and re-enter, but I want to make sure (Ulf?). For instance
> > it is highly likely that this new feature would cause something like:
> >
> > pm_runtime_get()              - called by random driver
> > -> clk_prepare_enable()       - genpd enables functioal clocks
> >     -> pm_runtime_get()        - called by clk_pm_runtime_get in clk core
> >        -> clk_prepare_enable() - genpd enables interface or bus clocks
> >
> > I guess this is safe from the pm_runtime_get/genpd perspective, but want
> > to make sure first.
> 
> Yes, this will work fine after recent fixes. Tested with Exynos IIS ASoC
> driver (which is also a clock provider), which in turn is a client for
> Exynos Audio Subsystem clock provider.
> 
> > Thanks,
> 
>  >> [...]
> 
> Best regards
> -- 
> Marek Szyprowski, PhD
> Samsung R&D Institute Poland
>
Marek Szyprowski April 6, 2017, 12:46 p.m. UTC | #5
Hi Michael,

On 2017-03-30 19:11, Michael Turquette wrote:
> Quoting Marek Szyprowski (2017-03-30 00:24:15)
>> On 2017-03-29 22:22, Michael Turquette wrote:
>>> Quoting Marek Szyprowski (2017-03-22 04:35:40)
>>>> Registers for some clocks might be located in the SOC area, which are under the
>>>> power domain. To enable access to those registers respective domain has to be
>>>> turned on. Additionally, registers for such clocks will usually loose its
>>>> contents when power domain is turned off, so additional saving and restoring of
>>>> them might be needed in the clock controller driver.
>>>>
>>>> This patch adds basic infrastructure in the clocks core to allow implementing
>>>> driver for such clocks under power domains. Clock provider can supply a
>>>> struct device pointer, which is the used by clock core for tracking and managing
>>>> clock's controller runtime pm state. Each clk_prepare() operation
>>>> will first call pm_runtime_get_sync() on the supplied device, while
>>>> clk_unprepare() will do pm_runtime_put_sync() at the end.
>>>>
>>>> Additional calls to pm_runtime_get/put functions are required to ensure that any
>>>> register access (like calculating/changing clock rates and unpreparing/disabling
>>>> unused clocks on boot) will be done with clock controller in runtime resumend
>>>> state.
>>> The above is a bit confusing. Is clk_prepare really special? It seems to
>>> me that every single clk_ops callback will need this?
>> clk_prepare/unprepare are special, because they allow sleeping, so they
>> are natural
>> candidates for the place for calling runtime PM operations.
>> clk_enable()/disable()
>> is called under a spinlock, so runtime pm cannot be called efficiently
>> there, but
>> core guarantees that they are called after clk_prepare(), so accessing
>> hw registers
>> is safe. The only remaining calls are not guaranteed to be called always
>> after
>> clk_prepare(), so those additional calls and checks in runtime pm are
>> needed there.
> Right, so any call that holds prepare_lock and might touch the hardware
> needs to first call pm_runtime_get_sync.
>
>>> That leads to my second question: why put this in the clk core? Can the
>>> clk provider driver simply add pm_runtime_{get,put} calls into its
>>> clk_ops callbacks instead? The clk core does not directly touch hardware
>>> (e.g. reading registers) so putting the pm runtime calls into the
>>> provider callbacks should be sufficient.
>> In theory is should be possible to duplicate all kind of clock build blocks
>> (gates, muxes, dividers, ...) with additional runtime pm calls. This
>> would however
>> end in much more code and a bit more complicated locking. Implementing
>> it in clk
>> core made the code simpler. It also turned out that runtime pm
>> integration is
>> needed for more that a single clock provider: besides Samsung SoCs
>> (Exynos 5433
>> and newer, Exynos 4412 ISP, Exynos Audio Subsystem, hacks in Exynos 542x
>> can be
>> also replaced by runtime PM calls), Ulf mentioned that exactly similar
>> pattern
>> is used for some UX500 SoCs (STE). More will probably come once the
>> feature is
>> in, because for now such drivers simply forces runtime active state as a
>> workaround or don't instantiate related power domains at all.
> I agree with the above, but I'm also wondering about the folks that use
> regmap internally to their own clock provider drivers. I guess they will
> have the option to either do their own thing or use this framework-level
> solution.
>
> I've started reviewing the code itself and will respond to those mails
> separately.

Is there a chance to get your comments soon? I would really like to give
this patchset a try in linux-next for a few days, but for now Sylwester
waits for you.

Stephen: what is your opinion on this patchset? Would you like to give it
a try in -next?

 > ...

Best regards
Marek Szyprowski May 22, 2017, 12:34 p.m. UTC | #6
Hi All,

On 2017-04-06 14:46, Marek Szyprowski wrote:
> Hi Michael,
>
> On 2017-03-30 19:11, Michael Turquette wrote:
>> Quoting Marek Szyprowski (2017-03-30 00:24:15)
>>> On 2017-03-29 22:22, Michael Turquette wrote:
>>>> Quoting Marek Szyprowski (2017-03-22 04:35:40)
>>>>> Registers for some clocks might be located in the SOC area, which 
>>>>> are under the
>>>>> power domain. To enable access to those registers respective 
>>>>> domain has to be
>>>>> turned on. Additionally, registers for such clocks will usually 
>>>>> loose its
>>>>> contents when power domain is turned off, so additional saving and 
>>>>> restoring of
>>>>> them might be needed in the clock controller driver.
>>>>>
>>>>> This patch adds basic infrastructure in the clocks core to allow 
>>>>> implementing
>>>>> driver for such clocks under power domains. Clock provider can 
>>>>> supply a
>>>>> struct device pointer, which is the used by clock core for 
>>>>> tracking and managing
>>>>> clock's controller runtime pm state. Each clk_prepare() operation
>>>>> will first call pm_runtime_get_sync() on the supplied device, while
>>>>> clk_unprepare() will do pm_runtime_put_sync() at the end.
>>>>>
>>>>> Additional calls to pm_runtime_get/put functions are required to 
>>>>> ensure that any
>>>>> register access (like calculating/changing clock rates and 
>>>>> unpreparing/disabling
>>>>> unused clocks on boot) will be done with clock controller in 
>>>>> runtime resumend
>>>>> state.
>>>> The above is a bit confusing. Is clk_prepare really special? It 
>>>> seems to
>>>> me that every single clk_ops callback will need this?
>>> clk_prepare/unprepare are special, because they allow sleeping, so they
>>> are natural
>>> candidates for the place for calling runtime PM operations.
>>> clk_enable()/disable()
>>> is called under a spinlock, so runtime pm cannot be called efficiently
>>> there, but
>>> core guarantees that they are called after clk_prepare(), so accessing
>>> hw registers
>>> is safe. The only remaining calls are not guaranteed to be called 
>>> always
>>> after
>>> clk_prepare(), so those additional calls and checks in runtime pm are
>>> needed there.
>> Right, so any call that holds prepare_lock and might touch the hardware
>> needs to first call pm_runtime_get_sync.
>>
>>>> That leads to my second question: why put this in the clk core? Can 
>>>> the
>>>> clk provider driver simply add pm_runtime_{get,put} calls into its
>>>> clk_ops callbacks instead? The clk core does not directly touch 
>>>> hardware
>>>> (e.g. reading registers) so putting the pm runtime calls into the
>>>> provider callbacks should be sufficient.
>>> In theory is should be possible to duplicate all kind of clock build 
>>> blocks
>>> (gates, muxes, dividers, ...) with additional runtime pm calls. This
>>> would however
>>> end in much more code and a bit more complicated locking. Implementing
>>> it in clk
>>> core made the code simpler. It also turned out that runtime pm
>>> integration is
>>> needed for more that a single clock provider: besides Samsung SoCs
>>> (Exynos 5433
>>> and newer, Exynos 4412 ISP, Exynos Audio Subsystem, hacks in Exynos 
>>> 542x
>>> can be
>>> also replaced by runtime PM calls), Ulf mentioned that exactly similar
>>> pattern
>>> is used for some UX500 SoCs (STE). More will probably come once the
>>> feature is
>>> in, because for now such drivers simply forces runtime active state 
>>> as a
>>> workaround or don't instantiate related power domains at all.
>> I agree with the above, but I'm also wondering about the folks that use
>> regmap internally to their own clock provider drivers. I guess they will
>> have the option to either do their own thing or use this framework-level
>> solution.
>>
>> I've started reviewing the code itself and will respond to those mails
>> separately.
>
> Is there a chance to get your comments soon? I would really like to give
> this patchset a try in linux-next for a few days, but for now Sylwester
> waits for you.
>
> Stephen: what is your opinion on this patchset? Would you like to give it
> a try in -next?
>

Mike, Stephen: any comments? We already missed v4.12 merge window...

Best regards
diff mbox

Patch

diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
index 0fb39fe217d1..a6001b6e49a7 100644
--- a/drivers/clk/clk.c
+++ b/drivers/clk/clk.c
@@ -21,6 +21,7 @@ 
 #include <linux/of.h>
 #include <linux/device.h>
 #include <linux/init.h>
+#include <linux/pm_runtime.h>
 #include <linux/sched.h>
 #include <linux/clkdev.h>
 
@@ -46,6 +47,7 @@  struct clk_core {
 	const struct clk_ops	*ops;
 	struct clk_hw		*hw;
 	struct module		*owner;
+	struct device		*dev;
 	struct clk_core		*parent;
 	const char		**parent_names;
 	struct clk_core		**parents;
@@ -87,6 +89,26 @@  struct clk {
 	struct hlist_node clks_node;
 };
 
+/***           runtime pm          ***/
+static int clk_pm_runtime_get(struct clk_core *core)
+{
+	int ret = 0;
+
+	if (!core->dev)
+		return 0;
+
+	ret = pm_runtime_get_sync(core->dev);
+	return ret < 0 ? ret : 0;
+}
+
+static void clk_pm_runtime_put(struct clk_core *core)
+{
+	if (!core->dev)
+		return;
+
+	pm_runtime_put_sync(core->dev);
+}
+
 /***           locking             ***/
 static void clk_prepare_lock(void)
 {
@@ -150,6 +172,8 @@  static void clk_enable_unlock(unsigned long flags)
 
 static bool clk_core_is_prepared(struct clk_core *core)
 {
+	bool ret = false;
+
 	/*
 	 * .is_prepared is optional for clocks that can prepare
 	 * fall back to software usage counter if it is missing
@@ -157,11 +181,18 @@  static bool clk_core_is_prepared(struct clk_core *core)
 	if (!core->ops->is_prepared)
 		return core->prepare_count;
 
-	return core->ops->is_prepared(core->hw);
+	if (!clk_pm_runtime_get(core)) {
+		ret = core->ops->is_prepared(core->hw);
+		clk_pm_runtime_put(core);
+	}
+
+	return ret;
 }
 
 static bool clk_core_is_enabled(struct clk_core *core)
 {
+	bool ret = false;
+
 	/*
 	 * .is_enabled is only mandatory for clocks that gate
 	 * fall back to software usage counter if .is_enabled is missing
@@ -169,7 +200,29 @@  static bool clk_core_is_enabled(struct clk_core *core)
 	if (!core->ops->is_enabled)
 		return core->enable_count;
 
-	return core->ops->is_enabled(core->hw);
+	/*
+	 * Check if clock controller's device is runtime active before
+	 * calling .is_enabled callback. If not, assume that clock is
+	 * disabled, because we might be called from atomic context, from
+	 * which pm_runtime_get() is not allowed.
+	 * This function is called mainly from clk_disable_unused_subtree,
+	 * which ensures proper runtime pm activation of controller before
+	 * taking enable spinlock, but the below check is needed if one tries
+	 * to call it from other places.
+	 */
+	if (core->dev) {
+		pm_runtime_get_noresume(core->dev);
+		if (!pm_runtime_active(core->dev)) {
+			ret = false;
+			goto done;
+		}
+	}
+
+	ret = core->ops->is_enabled(core->hw);
+done:
+	clk_pm_runtime_put(core);
+
+	return ret;
 }
 
 /***    helper functions   ***/
@@ -489,6 +542,8 @@  static void clk_core_unprepare(struct clk_core *core)
 	if (core->ops->unprepare)
 		core->ops->unprepare(core->hw);
 
+	clk_pm_runtime_put(core);
+
 	trace_clk_unprepare_complete(core);
 	clk_core_unprepare(core->parent);
 }
@@ -530,10 +585,14 @@  static int clk_core_prepare(struct clk_core *core)
 		return 0;
 
 	if (core->prepare_count == 0) {
-		ret = clk_core_prepare(core->parent);
+		ret = clk_pm_runtime_get(core);
 		if (ret)
 			return ret;
 
+		ret = clk_core_prepare(core->parent);
+		if (ret)
+			goto runtime_put;
+
 		trace_clk_prepare(core);
 
 		if (core->ops->prepare)
@@ -541,15 +600,18 @@  static int clk_core_prepare(struct clk_core *core)
 
 		trace_clk_prepare_complete(core);
 
-		if (ret) {
-			clk_core_unprepare(core->parent);
-			return ret;
-		}
+		if (ret)
+			goto unprepare;
 	}
 
 	core->prepare_count++;
 
 	return 0;
+unprepare:
+	clk_core_unprepare(core->parent);
+runtime_put:
+	clk_pm_runtime_put(core);
+	return ret;
 }
 
 static int clk_core_prepare_lock(struct clk_core *core)
@@ -745,6 +807,9 @@  static void clk_unprepare_unused_subtree(struct clk_core *core)
 	if (core->flags & CLK_IGNORE_UNUSED)
 		return;
 
+	if (clk_pm_runtime_get(core))
+		return;
+
 	if (clk_core_is_prepared(core)) {
 		trace_clk_unprepare(core);
 		if (core->ops->unprepare_unused)
@@ -753,6 +818,8 @@  static void clk_unprepare_unused_subtree(struct clk_core *core)
 			core->ops->unprepare(core->hw);
 		trace_clk_unprepare_complete(core);
 	}
+
+	clk_pm_runtime_put(core);
 }
 
 static void clk_disable_unused_subtree(struct clk_core *core)
@@ -768,6 +835,9 @@  static void clk_disable_unused_subtree(struct clk_core *core)
 	if (core->flags & CLK_OPS_PARENT_ENABLE)
 		clk_core_prepare_enable(core->parent);
 
+	if (clk_pm_runtime_get(core))
+		goto unprepare_out;
+
 	flags = clk_enable_lock();
 
 	if (core->enable_count)
@@ -792,6 +862,8 @@  static void clk_disable_unused_subtree(struct clk_core *core)
 
 unlock_out:
 	clk_enable_unlock(flags);
+	clk_pm_runtime_put(core);
+unprepare_out:
 	if (core->flags & CLK_OPS_PARENT_ENABLE)
 		clk_core_disable_unprepare(core->parent);
 }
@@ -1036,9 +1108,13 @@  long clk_get_accuracy(struct clk *clk)
 static unsigned long clk_recalc(struct clk_core *core,
 				unsigned long parent_rate)
 {
-	if (core->ops->recalc_rate)
-		return core->ops->recalc_rate(core->hw, parent_rate);
-	return parent_rate;
+	unsigned long rate = parent_rate;
+
+	if (core->ops->recalc_rate && !clk_pm_runtime_get(core)) {
+		rate = core->ops->recalc_rate(core->hw, parent_rate);
+		clk_pm_runtime_put(core);
+	}
+	return rate;
 }
 
 /**
@@ -1563,6 +1639,7 @@  static int clk_core_set_rate_nolock(struct clk_core *core,
 {
 	struct clk_core *top, *fail_clk;
 	unsigned long rate = req_rate;
+	int ret = 0;
 
 	if (!core)
 		return 0;
@@ -1579,21 +1656,28 @@  static int clk_core_set_rate_nolock(struct clk_core *core,
 	if (!top)
 		return -EINVAL;
 
+	ret = clk_pm_runtime_get(core);
+	if (ret)
+		return ret;
+
 	/* notify that we are about to change rates */
 	fail_clk = clk_propagate_rate_change(top, PRE_RATE_CHANGE);
 	if (fail_clk) {
 		pr_debug("%s: failed to set %s rate\n", __func__,
 				fail_clk->name);
 		clk_propagate_rate_change(top, ABORT_RATE_CHANGE);
-		return -EBUSY;
+		ret = -EBUSY;
+		goto err;
 	}
 
 	/* change the rates */
 	clk_change_rate(top);
 
 	core->req_rate = req_rate;
+err:
+	clk_pm_runtime_put(core);
 
-	return 0;
+	return ret;
 }
 
 /**
@@ -1824,12 +1908,16 @@  static int clk_core_set_parent(struct clk_core *core, struct clk_core *parent)
 		p_rate = parent->rate;
 	}
 
+	ret = clk_pm_runtime_get(core);
+	if (ret)
+		goto out;
+
 	/* propagate PRE_RATE_CHANGE notifications */
 	ret = __clk_speculate_rates(core, p_rate);
 
 	/* abort if a driver objects */
 	if (ret & NOTIFY_STOP_MASK)
-		goto out;
+		goto runtime_put;
 
 	/* do the re-parent */
 	ret = __clk_set_parent(core, parent, p_index);
@@ -1842,6 +1930,8 @@  static int clk_core_set_parent(struct clk_core *core, struct clk_core *parent)
 		__clk_recalc_accuracies(core);
 	}
 
+runtime_put:
+	clk_pm_runtime_put(core);
 out:
 	clk_prepare_unlock();
 
@@ -2549,6 +2639,12 @@  struct clk *clk_register(struct device *dev, struct clk_hw *hw)
 		goto fail_name;
 	}
 	core->ops = hw->init->ops;
+	if (dev && pm_runtime_enabled(dev)) {
+		core->dev = dev;
+		ret = clk_pm_runtime_get(core);
+		if (ret)
+			goto fail_pm;
+	}
 	if (dev && dev->driver)
 		core->owner = dev->driver->owner;
 	core->hw = hw;
@@ -2595,12 +2691,13 @@  struct clk *clk_register(struct device *dev, struct clk_hw *hw)
 	}
 
 	ret = __clk_core_init(core);
-	if (!ret)
+	if (!ret) {
+		clk_pm_runtime_put(core);
 		return hw->clk;
+	}
 
 	__clk_free_clk(hw->clk);
 	hw->clk = NULL;
-
 fail_parents:
 	kfree(core->parents);
 fail_parent_names_copy:
@@ -2608,6 +2705,8 @@  struct clk *clk_register(struct device *dev, struct clk_hw *hw)
 		kfree_const(core->parent_names[i]);
 	kfree(core->parent_names);
 fail_parent_names:
+	clk_pm_runtime_put(core);
+fail_pm:
 	kfree_const(core->name);
 fail_name:
 	kfree(core);