diff mbox

dmaengine: pl330: get rid of pm_runtime_irq_safe()

Message ID 1428649045-26444-1-git-send-email-r.baldyga@samsung.com (mailing list archive)
State Superseded
Headers show

Commit Message

Robert Baldyga April 10, 2015, 6:57 a.m. UTC
As using pm_runtime_irq_safe() causes power domain is always enabled,
we want to get rid of it to reach better power efficiency. For this purpose
we call pm_runtime_get()/pm_runtime_put() only in DMA channel allocate/free
code. DMA channels are always requested and freed in non-atomic context,
so we don't need pm_runtime_irq_safe().

With pm_runtime_irq_safe() enabled the only action performed by
pm_runtime_get()/pm_runtime_put() functions was enabling and disabling
AHB clock, so now we do that manually to avoid using pm_runtime functions
in atomic context.

In result we manage AHB clock as we did before, plus we can disable power
domain when nobody uses DMA controller.

Signed-off-by: Robert Baldyga <r.baldyga@samsung.com>
---
 drivers/dma/pl330.c | 57 ++++++++++++++++++++++++++++++++---------------------
 1 file changed, 34 insertions(+), 23 deletions(-)

Comments

Krzysztof Kozlowski April 10, 2015, 7:53 a.m. UTC | #1
2015-04-10 8:57 GMT+02:00 Robert Baldyga <r.baldyga@samsung.com>:
> As using pm_runtime_irq_safe() causes power domain is always enabled,
> we want to get rid of it to reach better power efficiency. For this purpose
> we call pm_runtime_get()/pm_runtime_put() only in DMA channel allocate/free
> code. DMA channels are always requested and freed in non-atomic context,
> so we don't need pm_runtime_irq_safe().
>
> With pm_runtime_irq_safe() enabled the only action performed by
> pm_runtime_get()/pm_runtime_put() functions was enabling and disabling
> AHB clock, so now we do that manually to avoid using pm_runtime functions
> in atomic context.
>
> In result we manage AHB clock as we did before, plus we can disable power
> domain when nobody uses DMA controller.
>
> Signed-off-by: Robert Baldyga <r.baldyga@samsung.com>

Great job! Some comments below.

> ---
>  drivers/dma/pl330.c | 57 ++++++++++++++++++++++++++++++++---------------------
>  1 file changed, 34 insertions(+), 23 deletions(-)
>
> diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c
> index 0e1f567..5348ab9 100644
> --- a/drivers/dma/pl330.c
> +++ b/drivers/dma/pl330.c
> @@ -1969,6 +1969,7 @@ static inline void fill_queue(struct dma_pl330_chan *pch)
>  static void pl330_tasklet(unsigned long data)
>  {
>         struct dma_pl330_chan *pch = (struct dma_pl330_chan *)data;
> +       struct amba_device *pcdev = to_amba_device(pch->dmac->ddma.dev);
>         struct dma_pl330_desc *desc, *_dt;
>         unsigned long flags;
>         bool power_down = false;
> @@ -2032,11 +2033,9 @@ static void pl330_tasklet(unsigned long data)
>         }
>         spin_unlock_irqrestore(&pch->lock, flags);
>
> -       /* If work list empty, power down */
> -       if (power_down) {
> -               pm_runtime_mark_last_busy(pch->dmac->ddma.dev);
> -               pm_runtime_put_autosuspend(pch->dmac->ddma.dev);
> -       }
> +       /* If work list empty, disable clock */
> +       if (power_down)
> +               amba_pclk_disable(pcdev);
>  }
>
>  bool pl330_filter(struct dma_chan *chan, void *param)
> @@ -2077,6 +2076,7 @@ static int pl330_alloc_chan_resources(struct dma_chan *chan)
>         struct pl330_dmac *pl330 = pch->dmac;
>         unsigned long flags;
>
> +       pm_runtime_get_sync(pch->dmac->ddma.dev);
>         spin_lock_irqsave(&pch->lock, flags);
>
>         dma_cookie_init(chan);
> @@ -2165,10 +2165,15 @@ static int pl330_terminate_all(struct dma_chan *chan)
>  int pl330_pause(struct dma_chan *chan)
>  {
>         struct dma_pl330_chan *pch = to_pchan(chan);
> +       struct amba_device *pcdev = to_amba_device(pch->dmac->ddma.dev);
>         struct pl330_dmac *pl330 = pch->dmac;
>         unsigned long flags;
> +       int ret;
> +
> +       ret = amba_pclk_enable(pcdev);
> +       if (ret < 0)
> +               return ret;
>
> -       pm_runtime_get_sync(pl330->ddma.dev);
>         spin_lock_irqsave(&pch->lock, flags);
>
>         spin_lock(&pl330->lock);
> @@ -2176,8 +2181,7 @@ int pl330_pause(struct dma_chan *chan)
>         spin_unlock(&pl330->lock);
>
>         spin_unlock_irqrestore(&pch->lock, flags);
> -       pm_runtime_mark_last_busy(pl330->ddma.dev);
> -       pm_runtime_put_autosuspend(pl330->ddma.dev);
> +       amba_pclk_disable(pcdev);
>
>         return 0;
>  }
> @@ -2185,11 +2189,16 @@ int pl330_pause(struct dma_chan *chan)
>  static void pl330_free_chan_resources(struct dma_chan *chan)
>  {
>         struct dma_pl330_chan *pch = to_pchan(chan);
> +       struct amba_device *pcdev = to_amba_device(pch->dmac->ddma.dev);
>         unsigned long flags;
> +       int ret;
>
>         tasklet_kill(&pch->task);
>
> -       pm_runtime_get_sync(pch->dmac->ddma.dev);
> +       ret = amba_pclk_enable(pcdev);
> +       if (ret < 0)
> +               return;
> +
>         spin_lock_irqsave(&pch->lock, flags);
>
>         pl330_release_channel(pch->thread);
> @@ -2199,6 +2208,7 @@ static void pl330_free_chan_resources(struct dma_chan *chan)
>                 list_splice_tail_init(&pch->work_list, &pch->dmac->desc_pool);
>
>         spin_unlock_irqrestore(&pch->lock, flags);
> +       amba_pclk_disable(pcdev);
>         pm_runtime_mark_last_busy(pch->dmac->ddma.dev);
>         pm_runtime_put_autosuspend(pch->dmac->ddma.dev);
>  }
> @@ -2206,12 +2216,17 @@ static void pl330_free_chan_resources(struct dma_chan *chan)
>  int pl330_get_current_xferred_count(struct dma_pl330_chan *pch,
>                 struct dma_pl330_desc *desc)
>  {
> +       struct amba_device *pcdev = to_amba_device(pch->dmac->ddma.dev);
>         struct pl330_thread *thrd = pch->thread;
>         struct pl330_dmac *pl330 = pch->dmac;
>         void __iomem *regs = thrd->dmac->base;
>         u32 val, addr;
> +       int ret;
> +
> +       ret = amba_pclk_enable(pcdev);
> +       if (ret < 0)
> +               return ret;
>
> -       pm_runtime_get_sync(pl330->ddma.dev);
>         val = addr = 0;
>         if (desc->rqcfg.src_inc) {
>                 val = readl(regs + SA(thrd->id));
> @@ -2220,8 +2235,8 @@ int pl330_get_current_xferred_count(struct dma_pl330_chan *pch,
>                 val = readl(regs + DA(thrd->id));
>                 addr = desc->px.dst_addr;
>         }
> -       pm_runtime_mark_last_busy(pch->dmac->ddma.dev);
> -       pm_runtime_put_autosuspend(pl330->ddma.dev);
> +
> +       amba_pclk_disable(pcdev);
>         return val - addr;
>  }
>
> @@ -2276,17 +2291,21 @@ out:
>  static void pl330_issue_pending(struct dma_chan *chan)
>  {
>         struct dma_pl330_chan *pch = to_pchan(chan);
> +       struct amba_device *pcdev = to_amba_device(pch->dmac->ddma.dev);
>         unsigned long flags;
> +       int ret;
>
>         spin_lock_irqsave(&pch->lock, flags);
>         if (list_empty(&pch->work_list)) {
>                 /*
>                  * Warn on nothing pending. Empty submitted_list may
> -                * break our pm_runtime usage counter as it is
> -                * updated on work_list emptiness status.
> +                * break our clock usage counter as it is updated on
> +                * work_list emptiness status.
>                  */
>                 WARN_ON(list_empty(&pch->submitted_list));
> -               pm_runtime_get_sync(pch->dmac->ddma.dev);
> +               ret = amba_pclk_enable(pcdev);
> +               if (ret < 0)
> +                       return;
>         }
>         list_splice_tail_init(&pch->submitted_list, &pch->work_list);
>         spin_unlock_irqrestore(&pch->lock, flags);
> @@ -2723,10 +2742,6 @@ static int __maybe_unused pl330_suspend(struct device *dev)
>
>         pm_runtime_disable(dev);
>
> -       if (!pm_runtime_status_suspended(dev)) {
> -               /* amba did not disable the clock */
> -               amba_pclk_disable(pcdev);
> -       }

What if system suspend happens when clock is enabled? I think it may
happen between issue_pending and tasklet execution. In such (rare)
case the clock won't be disabled but will be unprepared.

>         amba_pclk_unprepare(pcdev);
>
>         return 0;
> @@ -2741,9 +2756,6 @@ static int __maybe_unused pl330_resume(struct device *dev)
>         if (ret)
>                 return ret;
>
> -       if (!pm_runtime_status_suspended(dev))
> -               ret = amba_pclk_enable(pcdev);
> -
>         pm_runtime_enable(dev);
>
>         return ret;
> @@ -2910,7 +2922,6 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id)
>                 pcfg->data_buf_dep, pcfg->data_bus_width / 8, pcfg->num_chan,
>                 pcfg->num_peri, pcfg->num_events);
>
> -       pm_runtime_irq_safe(&adev->dev);
>         pm_runtime_use_autosuspend(&adev->dev);
>         pm_runtime_set_autosuspend_delay(&adev->dev, PL330_AUTOSUSPEND_DELAY);

Any needs for updating also PL330_AUTOSUSPEND_DELAY? I set it to 20ms
to avoid too frequent suspend/resume. In your solution suspend happens
in different cases so maybe the time should longer?

Best regards,
Krzysztof
--
To unsubscribe from this list: send the line "unsubscribe dmaengine" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Lars-Peter Clausen April 10, 2015, 8:04 a.m. UTC | #2
On 04/10/2015 08:57 AM, Robert Baldyga wrote:
> As using pm_runtime_irq_safe() causes power domain is always enabled,
> we want to get rid of it to reach better power efficiency. For this purpose
> we call pm_runtime_get()/pm_runtime_put() only in DMA channel allocate/free
> code. DMA channels are always requested and freed in non-atomic context,
> so we don't need pm_runtime_irq_safe().

I wonder how useful this is considering that pretty much always a channel is 
requested. I think we need an extension to the dmaengine API that allows a 
channel consumer to notify the driver that the channel that it requested is 
currently not in use. E.g. something like dmaengine_pm_{get,put}(struct 
dma_chan *). These functions would have the restriction that they can only 
be called from a non-atomic context, whereas issue_pending() and friends can 
still be called from a atomic context. So dmaengine_pm_get() would kind of 
be a notification that consumer intends to do something  in the near future 
whereas dmaengine_pm_put() would be a notification that it is not going to 
use the channel in the near future.

E.g. for audio DMA the audio driver could call dmaengine_pm_get() when the 
PCM device is opened and dmaengine_pm_put() when it is closed. Whereas 
issue_pending is called when the audio is started.

- Lars

--
To unsubscribe from this list: send the line "unsubscribe dmaengine" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Robert Baldyga April 10, 2015, 9:57 a.m. UTC | #3
On 04/10/2015 10:04 AM, Lars-Peter Clausen wrote:
> On 04/10/2015 08:57 AM, Robert Baldyga wrote:
>> As using pm_runtime_irq_safe() causes power domain is always enabled,
>> we want to get rid of it to reach better power efficiency. For this purpose
>> we call pm_runtime_get()/pm_runtime_put() only in DMA channel allocate/free
>> code. DMA channels are always requested and freed in non-atomic context,
>> so we don't need pm_runtime_irq_safe().
> 
> I wonder how useful this is considering that pretty much always a channel is 
> requested. I think we need an extension to the dmaengine API that allows a 
> channel consumer to notify the driver that the channel that it requested is 
> currently not in use. E.g. something like dmaengine_pm_{get,put}(struct 
> dma_chan *). These functions would have the restriction that they can only 
> be called from a non-atomic context, whereas issue_pending() and friends can 
> still be called from a atomic context. So dmaengine_pm_get() would kind of 
> be a notification that consumer intends to do something  in the near future 
> whereas dmaengine_pm_put() would be a notification that it is not going to 
> use the channel in the near future.
> 
> E.g. for audio DMA the audio driver could call dmaengine_pm_get() when the 
> PCM device is opened and dmaengine_pm_put() when it is closed. Whereas 
> issue_pending is called when the audio is started.
> 

I see. I'm considering how to do it. It would need to make changes in
all clients, or at least doing dmaengine_pm_get() by default while
requesting channel.

However separating clock enable/disable (which can be done in atomic
context) from runtime PM (which we prefer to use in non-atomic context)
still seems to be good idea. While dmaengine_pm_{get,put} could be
called when client is going to use DMA channel in near future, clock
could be enabled on demand and disabled immediately after each
operation. It can provide some gain, especially in cases when time
interval between dmaengine_pm_get() and dmaengine_pm_put() is much
longer than period when we actually are using DMA hardware.

Thanks,
Robert Baldyga
--
To unsubscribe from this list: send the line "unsubscribe dmaengine" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Lars-Peter Clausen April 10, 2015, 10:47 a.m. UTC | #4
On 04/10/2015 11:57 AM, Robert Baldyga wrote:
> On 04/10/2015 10:04 AM, Lars-Peter Clausen wrote:
>> On 04/10/2015 08:57 AM, Robert Baldyga wrote:
>>> As using pm_runtime_irq_safe() causes power domain is always enabled,
>>> we want to get rid of it to reach better power efficiency. For this purpose
>>> we call pm_runtime_get()/pm_runtime_put() only in DMA channel allocate/free
>>> code. DMA channels are always requested and freed in non-atomic context,
>>> so we don't need pm_runtime_irq_safe().
>>
>> I wonder how useful this is considering that pretty much always a channel is
>> requested. I think we need an extension to the dmaengine API that allows a
>> channel consumer to notify the driver that the channel that it requested is
>> currently not in use. E.g. something like dmaengine_pm_{get,put}(struct
>> dma_chan *). These functions would have the restriction that they can only
>> be called from a non-atomic context, whereas issue_pending() and friends can
>> still be called from a atomic context. So dmaengine_pm_get() would kind of
>> be a notification that consumer intends to do something  in the near future
>> whereas dmaengine_pm_put() would be a notification that it is not going to
>> use the channel in the near future.
>>
>> E.g. for audio DMA the audio driver could call dmaengine_pm_get() when the
>> PCM device is opened and dmaengine_pm_put() when it is closed. Whereas
>> issue_pending is called when the audio is started.
>>
>
> I see. I'm considering how to do it. It would need to make changes in
> all clients, or at least doing dmaengine_pm_get() by default while
> requesting channel.
>

To maintain backwards compatibility when a channel is requested it should 
implicitly also get a reference. If the driver doesn't need the channel 
immediately it can call dmaengine_pm_put() after requesting the channel. 
This allows adding support for this to the dmaengine clients one by one 
rather than having to do it for all at once.

> However separating clock enable/disable (which can be done in atomic
> context) from runtime PM (which we prefer to use in non-atomic context)
> still seems to be good idea. While dmaengine_pm_{get,put} could be
> called when client is going to use DMA channel in near future, clock
> could be enabled on demand and disabled immediately after each
> operation. It can provide some gain, especially in cases when time
> interval between dmaengine_pm_get() and dmaengine_pm_put() is much
> longer than period when we actually are using DMA hardware.

Yes, dmaengine_pm_{get,put} would be a incremental improvement on top of 
this patch. I'm just trying to point out that doing pm_runtime_get_sync() in 
alloc_chan_resources() alone is not that effective from a runtime 
power-management point of view since you pretty much always have a channel 
requested. So you always end up with a reference and no power-saving will 
happen.

- Lars

--
To unsubscribe from this list: send the line "unsubscribe dmaengine" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c
index 0e1f567..5348ab9 100644
--- a/drivers/dma/pl330.c
+++ b/drivers/dma/pl330.c
@@ -1969,6 +1969,7 @@  static inline void fill_queue(struct dma_pl330_chan *pch)
 static void pl330_tasklet(unsigned long data)
 {
 	struct dma_pl330_chan *pch = (struct dma_pl330_chan *)data;
+	struct amba_device *pcdev = to_amba_device(pch->dmac->ddma.dev);
 	struct dma_pl330_desc *desc, *_dt;
 	unsigned long flags;
 	bool power_down = false;
@@ -2032,11 +2033,9 @@  static void pl330_tasklet(unsigned long data)
 	}
 	spin_unlock_irqrestore(&pch->lock, flags);
 
-	/* If work list empty, power down */
-	if (power_down) {
-		pm_runtime_mark_last_busy(pch->dmac->ddma.dev);
-		pm_runtime_put_autosuspend(pch->dmac->ddma.dev);
-	}
+	/* If work list empty, disable clock */
+	if (power_down)
+		amba_pclk_disable(pcdev);
 }
 
 bool pl330_filter(struct dma_chan *chan, void *param)
@@ -2077,6 +2076,7 @@  static int pl330_alloc_chan_resources(struct dma_chan *chan)
 	struct pl330_dmac *pl330 = pch->dmac;
 	unsigned long flags;
 
+	pm_runtime_get_sync(pch->dmac->ddma.dev);
 	spin_lock_irqsave(&pch->lock, flags);
 
 	dma_cookie_init(chan);
@@ -2165,10 +2165,15 @@  static int pl330_terminate_all(struct dma_chan *chan)
 int pl330_pause(struct dma_chan *chan)
 {
 	struct dma_pl330_chan *pch = to_pchan(chan);
+	struct amba_device *pcdev = to_amba_device(pch->dmac->ddma.dev);
 	struct pl330_dmac *pl330 = pch->dmac;
 	unsigned long flags;
+	int ret;
+
+	ret = amba_pclk_enable(pcdev);
+	if (ret < 0)
+		return ret;
 
-	pm_runtime_get_sync(pl330->ddma.dev);
 	spin_lock_irqsave(&pch->lock, flags);
 
 	spin_lock(&pl330->lock);
@@ -2176,8 +2181,7 @@  int pl330_pause(struct dma_chan *chan)
 	spin_unlock(&pl330->lock);
 
 	spin_unlock_irqrestore(&pch->lock, flags);
-	pm_runtime_mark_last_busy(pl330->ddma.dev);
-	pm_runtime_put_autosuspend(pl330->ddma.dev);
+	amba_pclk_disable(pcdev);
 
 	return 0;
 }
@@ -2185,11 +2189,16 @@  int pl330_pause(struct dma_chan *chan)
 static void pl330_free_chan_resources(struct dma_chan *chan)
 {
 	struct dma_pl330_chan *pch = to_pchan(chan);
+	struct amba_device *pcdev = to_amba_device(pch->dmac->ddma.dev);
 	unsigned long flags;
+	int ret;
 
 	tasklet_kill(&pch->task);
 
-	pm_runtime_get_sync(pch->dmac->ddma.dev);
+	ret = amba_pclk_enable(pcdev);
+	if (ret < 0)
+		return;
+
 	spin_lock_irqsave(&pch->lock, flags);
 
 	pl330_release_channel(pch->thread);
@@ -2199,6 +2208,7 @@  static void pl330_free_chan_resources(struct dma_chan *chan)
 		list_splice_tail_init(&pch->work_list, &pch->dmac->desc_pool);
 
 	spin_unlock_irqrestore(&pch->lock, flags);
+	amba_pclk_disable(pcdev);
 	pm_runtime_mark_last_busy(pch->dmac->ddma.dev);
 	pm_runtime_put_autosuspend(pch->dmac->ddma.dev);
 }
@@ -2206,12 +2216,17 @@  static void pl330_free_chan_resources(struct dma_chan *chan)
 int pl330_get_current_xferred_count(struct dma_pl330_chan *pch,
 		struct dma_pl330_desc *desc)
 {
+	struct amba_device *pcdev = to_amba_device(pch->dmac->ddma.dev);
 	struct pl330_thread *thrd = pch->thread;
 	struct pl330_dmac *pl330 = pch->dmac;
 	void __iomem *regs = thrd->dmac->base;
 	u32 val, addr;
+	int ret;
+
+	ret = amba_pclk_enable(pcdev);
+	if (ret < 0)
+		return ret;
 
-	pm_runtime_get_sync(pl330->ddma.dev);
 	val = addr = 0;
 	if (desc->rqcfg.src_inc) {
 		val = readl(regs + SA(thrd->id));
@@ -2220,8 +2235,8 @@  int pl330_get_current_xferred_count(struct dma_pl330_chan *pch,
 		val = readl(regs + DA(thrd->id));
 		addr = desc->px.dst_addr;
 	}
-	pm_runtime_mark_last_busy(pch->dmac->ddma.dev);
-	pm_runtime_put_autosuspend(pl330->ddma.dev);
+
+	amba_pclk_disable(pcdev);
 	return val - addr;
 }
 
@@ -2276,17 +2291,21 @@  out:
 static void pl330_issue_pending(struct dma_chan *chan)
 {
 	struct dma_pl330_chan *pch = to_pchan(chan);
+	struct amba_device *pcdev = to_amba_device(pch->dmac->ddma.dev);
 	unsigned long flags;
+	int ret;
 
 	spin_lock_irqsave(&pch->lock, flags);
 	if (list_empty(&pch->work_list)) {
 		/*
 		 * Warn on nothing pending. Empty submitted_list may
-		 * break our pm_runtime usage counter as it is
-		 * updated on work_list emptiness status.
+		 * break our clock usage counter as it is updated on
+		 * work_list emptiness status.
 		 */
 		WARN_ON(list_empty(&pch->submitted_list));
-		pm_runtime_get_sync(pch->dmac->ddma.dev);
+		ret = amba_pclk_enable(pcdev);
+		if (ret < 0)
+			return;
 	}
 	list_splice_tail_init(&pch->submitted_list, &pch->work_list);
 	spin_unlock_irqrestore(&pch->lock, flags);
@@ -2723,10 +2742,6 @@  static int __maybe_unused pl330_suspend(struct device *dev)
 
 	pm_runtime_disable(dev);
 
-	if (!pm_runtime_status_suspended(dev)) {
-		/* amba did not disable the clock */
-		amba_pclk_disable(pcdev);
-	}
 	amba_pclk_unprepare(pcdev);
 
 	return 0;
@@ -2741,9 +2756,6 @@  static int __maybe_unused pl330_resume(struct device *dev)
 	if (ret)
 		return ret;
 
-	if (!pm_runtime_status_suspended(dev))
-		ret = amba_pclk_enable(pcdev);
-
 	pm_runtime_enable(dev);
 
 	return ret;
@@ -2910,7 +2922,6 @@  pl330_probe(struct amba_device *adev, const struct amba_id *id)
 		pcfg->data_buf_dep, pcfg->data_bus_width / 8, pcfg->num_chan,
 		pcfg->num_peri, pcfg->num_events);
 
-	pm_runtime_irq_safe(&adev->dev);
 	pm_runtime_use_autosuspend(&adev->dev);
 	pm_runtime_set_autosuspend_delay(&adev->dev, PL330_AUTOSUSPEND_DELAY);
 	pm_runtime_mark_last_busy(&adev->dev);