Message ID | 1428649045-26444-1-git-send-email-r.baldyga@samsung.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
2015-04-10 8:57 GMT+02:00 Robert Baldyga <r.baldyga@samsung.com>: > As using pm_runtime_irq_safe() causes power domain is always enabled, > we want to get rid of it to reach better power efficiency. For this purpose > we call pm_runtime_get()/pm_runtime_put() only in DMA channel allocate/free > code. DMA channels are always requested and freed in non-atomic context, > so we don't need pm_runtime_irq_safe(). > > With pm_runtime_irq_safe() enabled the only action performed by > pm_runtime_get()/pm_runtime_put() functions was enabling and disabling > AHB clock, so now we do that manually to avoid using pm_runtime functions > in atomic context. > > In result we manage AHB clock as we did before, plus we can disable power > domain when nobody uses DMA controller. > > Signed-off-by: Robert Baldyga <r.baldyga@samsung.com> Great job! Some comments below. > --- > drivers/dma/pl330.c | 57 ++++++++++++++++++++++++++++++++--------------------- > 1 file changed, 34 insertions(+), 23 deletions(-) > > diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c > index 0e1f567..5348ab9 100644 > --- a/drivers/dma/pl330.c > +++ b/drivers/dma/pl330.c > @@ -1969,6 +1969,7 @@ static inline void fill_queue(struct dma_pl330_chan *pch) > static void pl330_tasklet(unsigned long data) > { > struct dma_pl330_chan *pch = (struct dma_pl330_chan *)data; > + struct amba_device *pcdev = to_amba_device(pch->dmac->ddma.dev); > struct dma_pl330_desc *desc, *_dt; > unsigned long flags; > bool power_down = false; > @@ -2032,11 +2033,9 @@ static void pl330_tasklet(unsigned long data) > } > spin_unlock_irqrestore(&pch->lock, flags); > > - /* If work list empty, power down */ > - if (power_down) { > - pm_runtime_mark_last_busy(pch->dmac->ddma.dev); > - pm_runtime_put_autosuspend(pch->dmac->ddma.dev); > - } > + /* If work list empty, disable clock */ > + if (power_down) > + amba_pclk_disable(pcdev); > } > > bool pl330_filter(struct dma_chan *chan, void *param) > @@ -2077,6 +2076,7 @@ static int pl330_alloc_chan_resources(struct dma_chan *chan) > struct pl330_dmac *pl330 = pch->dmac; > unsigned long flags; > > + pm_runtime_get_sync(pch->dmac->ddma.dev); > spin_lock_irqsave(&pch->lock, flags); > > dma_cookie_init(chan); > @@ -2165,10 +2165,15 @@ static int pl330_terminate_all(struct dma_chan *chan) > int pl330_pause(struct dma_chan *chan) > { > struct dma_pl330_chan *pch = to_pchan(chan); > + struct amba_device *pcdev = to_amba_device(pch->dmac->ddma.dev); > struct pl330_dmac *pl330 = pch->dmac; > unsigned long flags; > + int ret; > + > + ret = amba_pclk_enable(pcdev); > + if (ret < 0) > + return ret; > > - pm_runtime_get_sync(pl330->ddma.dev); > spin_lock_irqsave(&pch->lock, flags); > > spin_lock(&pl330->lock); > @@ -2176,8 +2181,7 @@ int pl330_pause(struct dma_chan *chan) > spin_unlock(&pl330->lock); > > spin_unlock_irqrestore(&pch->lock, flags); > - pm_runtime_mark_last_busy(pl330->ddma.dev); > - pm_runtime_put_autosuspend(pl330->ddma.dev); > + amba_pclk_disable(pcdev); > > return 0; > } > @@ -2185,11 +2189,16 @@ int pl330_pause(struct dma_chan *chan) > static void pl330_free_chan_resources(struct dma_chan *chan) > { > struct dma_pl330_chan *pch = to_pchan(chan); > + struct amba_device *pcdev = to_amba_device(pch->dmac->ddma.dev); > unsigned long flags; > + int ret; > > tasklet_kill(&pch->task); > > - pm_runtime_get_sync(pch->dmac->ddma.dev); > + ret = amba_pclk_enable(pcdev); > + if (ret < 0) > + return; > + > spin_lock_irqsave(&pch->lock, flags); > > pl330_release_channel(pch->thread); > @@ -2199,6 +2208,7 @@ static void pl330_free_chan_resources(struct dma_chan *chan) > list_splice_tail_init(&pch->work_list, &pch->dmac->desc_pool); > > spin_unlock_irqrestore(&pch->lock, flags); > + amba_pclk_disable(pcdev); > pm_runtime_mark_last_busy(pch->dmac->ddma.dev); > pm_runtime_put_autosuspend(pch->dmac->ddma.dev); > } > @@ -2206,12 +2216,17 @@ static void pl330_free_chan_resources(struct dma_chan *chan) > int pl330_get_current_xferred_count(struct dma_pl330_chan *pch, > struct dma_pl330_desc *desc) > { > + struct amba_device *pcdev = to_amba_device(pch->dmac->ddma.dev); > struct pl330_thread *thrd = pch->thread; > struct pl330_dmac *pl330 = pch->dmac; > void __iomem *regs = thrd->dmac->base; > u32 val, addr; > + int ret; > + > + ret = amba_pclk_enable(pcdev); > + if (ret < 0) > + return ret; > > - pm_runtime_get_sync(pl330->ddma.dev); > val = addr = 0; > if (desc->rqcfg.src_inc) { > val = readl(regs + SA(thrd->id)); > @@ -2220,8 +2235,8 @@ int pl330_get_current_xferred_count(struct dma_pl330_chan *pch, > val = readl(regs + DA(thrd->id)); > addr = desc->px.dst_addr; > } > - pm_runtime_mark_last_busy(pch->dmac->ddma.dev); > - pm_runtime_put_autosuspend(pl330->ddma.dev); > + > + amba_pclk_disable(pcdev); > return val - addr; > } > > @@ -2276,17 +2291,21 @@ out: > static void pl330_issue_pending(struct dma_chan *chan) > { > struct dma_pl330_chan *pch = to_pchan(chan); > + struct amba_device *pcdev = to_amba_device(pch->dmac->ddma.dev); > unsigned long flags; > + int ret; > > spin_lock_irqsave(&pch->lock, flags); > if (list_empty(&pch->work_list)) { > /* > * Warn on nothing pending. Empty submitted_list may > - * break our pm_runtime usage counter as it is > - * updated on work_list emptiness status. > + * break our clock usage counter as it is updated on > + * work_list emptiness status. > */ > WARN_ON(list_empty(&pch->submitted_list)); > - pm_runtime_get_sync(pch->dmac->ddma.dev); > + ret = amba_pclk_enable(pcdev); > + if (ret < 0) > + return; > } > list_splice_tail_init(&pch->submitted_list, &pch->work_list); > spin_unlock_irqrestore(&pch->lock, flags); > @@ -2723,10 +2742,6 @@ static int __maybe_unused pl330_suspend(struct device *dev) > > pm_runtime_disable(dev); > > - if (!pm_runtime_status_suspended(dev)) { > - /* amba did not disable the clock */ > - amba_pclk_disable(pcdev); > - } What if system suspend happens when clock is enabled? I think it may happen between issue_pending and tasklet execution. In such (rare) case the clock won't be disabled but will be unprepared. > amba_pclk_unprepare(pcdev); > > return 0; > @@ -2741,9 +2756,6 @@ static int __maybe_unused pl330_resume(struct device *dev) > if (ret) > return ret; > > - if (!pm_runtime_status_suspended(dev)) > - ret = amba_pclk_enable(pcdev); > - > pm_runtime_enable(dev); > > return ret; > @@ -2910,7 +2922,6 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id) > pcfg->data_buf_dep, pcfg->data_bus_width / 8, pcfg->num_chan, > pcfg->num_peri, pcfg->num_events); > > - pm_runtime_irq_safe(&adev->dev); > pm_runtime_use_autosuspend(&adev->dev); > pm_runtime_set_autosuspend_delay(&adev->dev, PL330_AUTOSUSPEND_DELAY); Any needs for updating also PL330_AUTOSUSPEND_DELAY? I set it to 20ms to avoid too frequent suspend/resume. In your solution suspend happens in different cases so maybe the time should longer? Best regards, Krzysztof -- To unsubscribe from this list: send the line "unsubscribe dmaengine" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 04/10/2015 08:57 AM, Robert Baldyga wrote: > As using pm_runtime_irq_safe() causes power domain is always enabled, > we want to get rid of it to reach better power efficiency. For this purpose > we call pm_runtime_get()/pm_runtime_put() only in DMA channel allocate/free > code. DMA channels are always requested and freed in non-atomic context, > so we don't need pm_runtime_irq_safe(). I wonder how useful this is considering that pretty much always a channel is requested. I think we need an extension to the dmaengine API that allows a channel consumer to notify the driver that the channel that it requested is currently not in use. E.g. something like dmaengine_pm_{get,put}(struct dma_chan *). These functions would have the restriction that they can only be called from a non-atomic context, whereas issue_pending() and friends can still be called from a atomic context. So dmaengine_pm_get() would kind of be a notification that consumer intends to do something in the near future whereas dmaengine_pm_put() would be a notification that it is not going to use the channel in the near future. E.g. for audio DMA the audio driver could call dmaengine_pm_get() when the PCM device is opened and dmaengine_pm_put() when it is closed. Whereas issue_pending is called when the audio is started. - Lars -- To unsubscribe from this list: send the line "unsubscribe dmaengine" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 04/10/2015 10:04 AM, Lars-Peter Clausen wrote: > On 04/10/2015 08:57 AM, Robert Baldyga wrote: >> As using pm_runtime_irq_safe() causes power domain is always enabled, >> we want to get rid of it to reach better power efficiency. For this purpose >> we call pm_runtime_get()/pm_runtime_put() only in DMA channel allocate/free >> code. DMA channels are always requested and freed in non-atomic context, >> so we don't need pm_runtime_irq_safe(). > > I wonder how useful this is considering that pretty much always a channel is > requested. I think we need an extension to the dmaengine API that allows a > channel consumer to notify the driver that the channel that it requested is > currently not in use. E.g. something like dmaengine_pm_{get,put}(struct > dma_chan *). These functions would have the restriction that they can only > be called from a non-atomic context, whereas issue_pending() and friends can > still be called from a atomic context. So dmaengine_pm_get() would kind of > be a notification that consumer intends to do something in the near future > whereas dmaengine_pm_put() would be a notification that it is not going to > use the channel in the near future. > > E.g. for audio DMA the audio driver could call dmaengine_pm_get() when the > PCM device is opened and dmaengine_pm_put() when it is closed. Whereas > issue_pending is called when the audio is started. > I see. I'm considering how to do it. It would need to make changes in all clients, or at least doing dmaengine_pm_get() by default while requesting channel. However separating clock enable/disable (which can be done in atomic context) from runtime PM (which we prefer to use in non-atomic context) still seems to be good idea. While dmaengine_pm_{get,put} could be called when client is going to use DMA channel in near future, clock could be enabled on demand and disabled immediately after each operation. It can provide some gain, especially in cases when time interval between dmaengine_pm_get() and dmaengine_pm_put() is much longer than period when we actually are using DMA hardware. Thanks, Robert Baldyga -- To unsubscribe from this list: send the line "unsubscribe dmaengine" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 04/10/2015 11:57 AM, Robert Baldyga wrote: > On 04/10/2015 10:04 AM, Lars-Peter Clausen wrote: >> On 04/10/2015 08:57 AM, Robert Baldyga wrote: >>> As using pm_runtime_irq_safe() causes power domain is always enabled, >>> we want to get rid of it to reach better power efficiency. For this purpose >>> we call pm_runtime_get()/pm_runtime_put() only in DMA channel allocate/free >>> code. DMA channels are always requested and freed in non-atomic context, >>> so we don't need pm_runtime_irq_safe(). >> >> I wonder how useful this is considering that pretty much always a channel is >> requested. I think we need an extension to the dmaengine API that allows a >> channel consumer to notify the driver that the channel that it requested is >> currently not in use. E.g. something like dmaengine_pm_{get,put}(struct >> dma_chan *). These functions would have the restriction that they can only >> be called from a non-atomic context, whereas issue_pending() and friends can >> still be called from a atomic context. So dmaengine_pm_get() would kind of >> be a notification that consumer intends to do something in the near future >> whereas dmaengine_pm_put() would be a notification that it is not going to >> use the channel in the near future. >> >> E.g. for audio DMA the audio driver could call dmaengine_pm_get() when the >> PCM device is opened and dmaengine_pm_put() when it is closed. Whereas >> issue_pending is called when the audio is started. >> > > I see. I'm considering how to do it. It would need to make changes in > all clients, or at least doing dmaengine_pm_get() by default while > requesting channel. > To maintain backwards compatibility when a channel is requested it should implicitly also get a reference. If the driver doesn't need the channel immediately it can call dmaengine_pm_put() after requesting the channel. This allows adding support for this to the dmaengine clients one by one rather than having to do it for all at once. > However separating clock enable/disable (which can be done in atomic > context) from runtime PM (which we prefer to use in non-atomic context) > still seems to be good idea. While dmaengine_pm_{get,put} could be > called when client is going to use DMA channel in near future, clock > could be enabled on demand and disabled immediately after each > operation. It can provide some gain, especially in cases when time > interval between dmaengine_pm_get() and dmaengine_pm_put() is much > longer than period when we actually are using DMA hardware. Yes, dmaengine_pm_{get,put} would be a incremental improvement on top of this patch. I'm just trying to point out that doing pm_runtime_get_sync() in alloc_chan_resources() alone is not that effective from a runtime power-management point of view since you pretty much always have a channel requested. So you always end up with a reference and no power-saving will happen. - Lars -- To unsubscribe from this list: send the line "unsubscribe dmaengine" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c index 0e1f567..5348ab9 100644 --- a/drivers/dma/pl330.c +++ b/drivers/dma/pl330.c @@ -1969,6 +1969,7 @@ static inline void fill_queue(struct dma_pl330_chan *pch) static void pl330_tasklet(unsigned long data) { struct dma_pl330_chan *pch = (struct dma_pl330_chan *)data; + struct amba_device *pcdev = to_amba_device(pch->dmac->ddma.dev); struct dma_pl330_desc *desc, *_dt; unsigned long flags; bool power_down = false; @@ -2032,11 +2033,9 @@ static void pl330_tasklet(unsigned long data) } spin_unlock_irqrestore(&pch->lock, flags); - /* If work list empty, power down */ - if (power_down) { - pm_runtime_mark_last_busy(pch->dmac->ddma.dev); - pm_runtime_put_autosuspend(pch->dmac->ddma.dev); - } + /* If work list empty, disable clock */ + if (power_down) + amba_pclk_disable(pcdev); } bool pl330_filter(struct dma_chan *chan, void *param) @@ -2077,6 +2076,7 @@ static int pl330_alloc_chan_resources(struct dma_chan *chan) struct pl330_dmac *pl330 = pch->dmac; unsigned long flags; + pm_runtime_get_sync(pch->dmac->ddma.dev); spin_lock_irqsave(&pch->lock, flags); dma_cookie_init(chan); @@ -2165,10 +2165,15 @@ static int pl330_terminate_all(struct dma_chan *chan) int pl330_pause(struct dma_chan *chan) { struct dma_pl330_chan *pch = to_pchan(chan); + struct amba_device *pcdev = to_amba_device(pch->dmac->ddma.dev); struct pl330_dmac *pl330 = pch->dmac; unsigned long flags; + int ret; + + ret = amba_pclk_enable(pcdev); + if (ret < 0) + return ret; - pm_runtime_get_sync(pl330->ddma.dev); spin_lock_irqsave(&pch->lock, flags); spin_lock(&pl330->lock); @@ -2176,8 +2181,7 @@ int pl330_pause(struct dma_chan *chan) spin_unlock(&pl330->lock); spin_unlock_irqrestore(&pch->lock, flags); - pm_runtime_mark_last_busy(pl330->ddma.dev); - pm_runtime_put_autosuspend(pl330->ddma.dev); + amba_pclk_disable(pcdev); return 0; } @@ -2185,11 +2189,16 @@ int pl330_pause(struct dma_chan *chan) static void pl330_free_chan_resources(struct dma_chan *chan) { struct dma_pl330_chan *pch = to_pchan(chan); + struct amba_device *pcdev = to_amba_device(pch->dmac->ddma.dev); unsigned long flags; + int ret; tasklet_kill(&pch->task); - pm_runtime_get_sync(pch->dmac->ddma.dev); + ret = amba_pclk_enable(pcdev); + if (ret < 0) + return; + spin_lock_irqsave(&pch->lock, flags); pl330_release_channel(pch->thread); @@ -2199,6 +2208,7 @@ static void pl330_free_chan_resources(struct dma_chan *chan) list_splice_tail_init(&pch->work_list, &pch->dmac->desc_pool); spin_unlock_irqrestore(&pch->lock, flags); + amba_pclk_disable(pcdev); pm_runtime_mark_last_busy(pch->dmac->ddma.dev); pm_runtime_put_autosuspend(pch->dmac->ddma.dev); } @@ -2206,12 +2216,17 @@ static void pl330_free_chan_resources(struct dma_chan *chan) int pl330_get_current_xferred_count(struct dma_pl330_chan *pch, struct dma_pl330_desc *desc) { + struct amba_device *pcdev = to_amba_device(pch->dmac->ddma.dev); struct pl330_thread *thrd = pch->thread; struct pl330_dmac *pl330 = pch->dmac; void __iomem *regs = thrd->dmac->base; u32 val, addr; + int ret; + + ret = amba_pclk_enable(pcdev); + if (ret < 0) + return ret; - pm_runtime_get_sync(pl330->ddma.dev); val = addr = 0; if (desc->rqcfg.src_inc) { val = readl(regs + SA(thrd->id)); @@ -2220,8 +2235,8 @@ int pl330_get_current_xferred_count(struct dma_pl330_chan *pch, val = readl(regs + DA(thrd->id)); addr = desc->px.dst_addr; } - pm_runtime_mark_last_busy(pch->dmac->ddma.dev); - pm_runtime_put_autosuspend(pl330->ddma.dev); + + amba_pclk_disable(pcdev); return val - addr; } @@ -2276,17 +2291,21 @@ out: static void pl330_issue_pending(struct dma_chan *chan) { struct dma_pl330_chan *pch = to_pchan(chan); + struct amba_device *pcdev = to_amba_device(pch->dmac->ddma.dev); unsigned long flags; + int ret; spin_lock_irqsave(&pch->lock, flags); if (list_empty(&pch->work_list)) { /* * Warn on nothing pending. Empty submitted_list may - * break our pm_runtime usage counter as it is - * updated on work_list emptiness status. + * break our clock usage counter as it is updated on + * work_list emptiness status. */ WARN_ON(list_empty(&pch->submitted_list)); - pm_runtime_get_sync(pch->dmac->ddma.dev); + ret = amba_pclk_enable(pcdev); + if (ret < 0) + return; } list_splice_tail_init(&pch->submitted_list, &pch->work_list); spin_unlock_irqrestore(&pch->lock, flags); @@ -2723,10 +2742,6 @@ static int __maybe_unused pl330_suspend(struct device *dev) pm_runtime_disable(dev); - if (!pm_runtime_status_suspended(dev)) { - /* amba did not disable the clock */ - amba_pclk_disable(pcdev); - } amba_pclk_unprepare(pcdev); return 0; @@ -2741,9 +2756,6 @@ static int __maybe_unused pl330_resume(struct device *dev) if (ret) return ret; - if (!pm_runtime_status_suspended(dev)) - ret = amba_pclk_enable(pcdev); - pm_runtime_enable(dev); return ret; @@ -2910,7 +2922,6 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id) pcfg->data_buf_dep, pcfg->data_bus_width / 8, pcfg->num_chan, pcfg->num_peri, pcfg->num_events); - pm_runtime_irq_safe(&adev->dev); pm_runtime_use_autosuspend(&adev->dev); pm_runtime_set_autosuspend_delay(&adev->dev, PL330_AUTOSUSPEND_DELAY); pm_runtime_mark_last_busy(&adev->dev);
As using pm_runtime_irq_safe() causes power domain is always enabled, we want to get rid of it to reach better power efficiency. For this purpose we call pm_runtime_get()/pm_runtime_put() only in DMA channel allocate/free code. DMA channels are always requested and freed in non-atomic context, so we don't need pm_runtime_irq_safe(). With pm_runtime_irq_safe() enabled the only action performed by pm_runtime_get()/pm_runtime_put() functions was enabling and disabling AHB clock, so now we do that manually to avoid using pm_runtime functions in atomic context. In result we manage AHB clock as we did before, plus we can disable power domain when nobody uses DMA controller. Signed-off-by: Robert Baldyga <r.baldyga@samsung.com> --- drivers/dma/pl330.c | 57 ++++++++++++++++++++++++++++++++--------------------- 1 file changed, 34 insertions(+), 23 deletions(-)