Message ID | 1416995095-13763-1-git-send-email-padma.v@samsung.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Hi Vinod/Lars, On 11/26/14, Padmavathi Venna <padma.v@samsung.com> wrote: > Fill txstate.residue with the amount of bytes remaining in the current > transfer if the transfer is not complete. This will be of particular > use to i2s DMA transfers, providing more accurate hw_ptr values to ASoC. > > I had taken the code from Dylan Reid <dgreid@chromium.org> patch from the > below link and modified according to the current dmaengine framework. > http://comments.gmane.org/gmane.linux.kernel.samsung-soc/23007 > > Cc: Dylan Reid <dgreid@chromium.org> > Signed-off-by: Padmavathi Venna <padma.v@samsung.com> > --- > > This patch has been tested for audio playback on exynos5420 peach-pit. > > drivers/dma/pl330.c | 67 > +++++++++++++++++++++++++++++++++++++++++++++++++- > 1 files changed, 65 insertions(+), 2 deletions(-) > > diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c > index b7493d2..db880ae 100644 > --- a/drivers/dma/pl330.c > +++ b/drivers/dma/pl330.c > @@ -2182,11 +2182,74 @@ static void pl330_free_chan_resources(struct > dma_chan *chan) > pm_runtime_put_autosuspend(pch->dmac->ddma.dev); > } > > +static inline int > +pl330_src_addr_in_desc(struct dma_pl330_desc *desc, unsigned int sar) > +{ > + return ((desc->px.src_addr <= sar) && > + (sar <= (desc->px.src_addr + desc->px.bytes))); > +} > + > +static inline int > +pl330_dst_addr_in_desc(struct dma_pl330_desc *desc, unsigned int dar) > +{ > + return ((desc->px.dst_addr <= dar) && > + (dar <= (desc->px.dst_addr + desc->px.bytes))); > +} > + > static enum dma_status > pl330_tx_status(struct dma_chan *chan, dma_cookie_t cookie, > struct dma_tx_state *txstate) > { > - return dma_cookie_status(chan, cookie, txstate); > + dma_addr_t sar, dar; > + struct dma_pl330_chan *pch = to_pchan(chan); > + void __iomem *regs = pch->dmac->base; > + struct pl330_thread *thrd = pch->thread; > + struct dma_pl330_desc *desc; > + unsigned int residue = 0; > + unsigned long flags; > + bool first = true; > + dma_cookie_t first_c, current_c; > + dma_cookie_t used; > + enum dma_status ret; > + > + ret = dma_cookie_status(chan, cookie, txstate); > + if (ret == DMA_COMPLETE || !txstate) > + return ret; > + > + used = txstate->used; > + > + spin_lock_irqsave(&pch->lock, flags); > + sar = readl(regs + SA(thrd->id)); > + dar = readl(regs + DA(thrd->id)); > + > + list_for_each_entry(desc, &pch->work_list, node) { > + if (desc->status == BUSY) { > + current_c = desc->txd.cookie; > + if (first) { > + first_c = desc->txd.cookie; > + first = false; > + } > + > + if (first_c < current_c) > + residue += desc->px.bytes; > + else { > + if (desc->rqcfg.src_inc && pl330_src_addr_in_desc(desc, sar)) { > + residue += desc->px.bytes; > + residue -= sar - desc->px.src_addr; > + } else if (desc->rqcfg.dst_inc && pl330_dst_addr_in_desc(desc, dar)) { > + residue += desc->px.bytes; > + residue -= dar - desc->px.dst_addr; > + } > + } > + } else if (desc->status == PREP) > + residue += desc->px.bytes; > + > + if (desc->txd.cookie == used) > + break; > + } > + spin_unlock_irqrestore(&pch->lock, flags); > + dma_set_residue(txstate, residue); > + return ret; > } > > static void pl330_issue_pending(struct dma_chan *chan) > @@ -2631,7 +2694,7 @@ static int pl330_dma_device_slave_caps(struct dma_chan > *dchan, > caps->directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV); > caps->cmd_pause = false; > caps->cmd_terminate = true; > - caps->residue_granularity = DMA_RESIDUE_GRANULARITY_DESCRIPTOR; > + caps->residue_granularity = DMA_RESIDUE_GRANULARITY_SEGMENT; > > return 0; > } Any comment on this patch? Thanks Padma > -- > 1.7.4.4 > > -- > To unsubscribe from this list: send the line "unsubscribe linux-samsung-soc" > in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html >
On 12/02/2014 06:38 AM, Padma Venkat wrote: > Hi Vinod/Lars, > > On 11/26/14, Padmavathi Venna <padma.v@samsung.com> wrote: >> Fill txstate.residue with the amount of bytes remaining in the current >> transfer if the transfer is not complete. This will be of particular >> use to i2s DMA transfers, providing more accurate hw_ptr values to ASoC. >> >> I had taken the code from Dylan Reid <dgreid@chromium.org> patch from the >> below link and modified according to the current dmaengine framework. >> http://comments.gmane.org/gmane.linux.kernel.samsung-soc/23007 >> >> Cc: Dylan Reid <dgreid@chromium.org> >> Signed-off-by: Padmavathi Venna <padma.v@samsung.com> >> --- >> >> This patch has been tested for audio playback on exynos5420 peach-pit. >> >> drivers/dma/pl330.c | 67 >> +++++++++++++++++++++++++++++++++++++++++++++++++- >> 1 files changed, 65 insertions(+), 2 deletions(-) >> >> diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c >> index b7493d2..db880ae 100644 >> --- a/drivers/dma/pl330.c >> +++ b/drivers/dma/pl330.c >> @@ -2182,11 +2182,74 @@ static void pl330_free_chan_resources(struct >> dma_chan *chan) >> pm_runtime_put_autosuspend(pch->dmac->ddma.dev); >> } >> >> +static inline int >> +pl330_src_addr_in_desc(struct dma_pl330_desc *desc, unsigned int sar) >> +{ >> + return ((desc->px.src_addr <= sar) && >> + (sar <= (desc->px.src_addr + desc->px.bytes))); >> +} >> + >> +static inline int >> +pl330_dst_addr_in_desc(struct dma_pl330_desc *desc, unsigned int dar) >> +{ >> + return ((desc->px.dst_addr <= dar) && >> + (dar <= (desc->px.dst_addr + desc->px.bytes))); >> +} >> + >> static enum dma_status >> pl330_tx_status(struct dma_chan *chan, dma_cookie_t cookie, >> struct dma_tx_state *txstate) >> { >> - return dma_cookie_status(chan, cookie, txstate); >> + dma_addr_t sar, dar; >> + struct dma_pl330_chan *pch = to_pchan(chan); >> + void __iomem *regs = pch->dmac->base; >> + struct pl330_thread *thrd = pch->thread; >> + struct dma_pl330_desc *desc; >> + unsigned int residue = 0; >> + unsigned long flags; >> + bool first = true; >> + dma_cookie_t first_c, current_c; >> + dma_cookie_t used; >> + enum dma_status ret; >> + >> + ret = dma_cookie_status(chan, cookie, txstate); >> + if (ret == DMA_COMPLETE || !txstate) >> + return ret; >> + >> + used = txstate->used; >> + >> + spin_lock_irqsave(&pch->lock, flags); >> + sar = readl(regs + SA(thrd->id)); >> + dar = readl(regs + DA(thrd->id)); >> + >> + list_for_each_entry(desc, &pch->work_list, node) { >> + if (desc->status == BUSY) { >> + current_c = desc->txd.cookie; >> + if (first) { >> + first_c = desc->txd.cookie; >> + first = false; >> + } >> + >> + if (first_c < current_c) >> + residue += desc->px.bytes; >> + else { >> + if (desc->rqcfg.src_inc && pl330_src_addr_in_desc(desc, sar)) { >> + residue += desc->px.bytes; >> + residue -= sar - desc->px.src_addr; >> + } else if (desc->rqcfg.dst_inc && pl330_dst_addr_in_desc(desc, dar)) { >> + residue += desc->px.bytes; >> + residue -= dar - desc->px.dst_addr; >> + } >> + } >> + } else if (desc->status == PREP) >> + residue += desc->px.bytes; >> + >> + if (desc->txd.cookie == used) >> + break; >> + } >> + spin_unlock_irqrestore(&pch->lock, flags); >> + dma_set_residue(txstate, residue); >> + return ret; >> } >> >> static void pl330_issue_pending(struct dma_chan *chan) >> @@ -2631,7 +2694,7 @@ static int pl330_dma_device_slave_caps(struct dma_chan >> *dchan, >> caps->directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV); >> caps->cmd_pause = false; >> caps->cmd_terminate = true; >> - caps->residue_granularity = DMA_RESIDUE_GRANULARITY_DESCRIPTOR; >> + caps->residue_granularity = DMA_RESIDUE_GRANULARITY_SEGMENT; >> >> return 0; >> } > > Any comment on this patch? Well it doesn't break audio, but I don't think it has the correct haviour for all cases yet. Again, the semantics are that it should return the progress of the transfer for which the allocation function returned the cookie that is passe to this function. You have to consider that there might be multiple different descriptors submitted and in the work list, not just the one we want to know the status of. The big problem with the pl330 driver is that the current structure of the driver makes it not so easy to implement the residue reporting correctly. - Lars
Hi Lars, [snip] >>> + >>> + ret = dma_cookie_status(chan, cookie, txstate); >>> + if (ret == DMA_COMPLETE || !txstate) >>> + return ret; >>> + >>> + used = txstate->used; >>> + >>> + spin_lock_irqsave(&pch->lock, flags); >>> + sar = readl(regs + SA(thrd->id)); >>> + dar = readl(regs + DA(thrd->id)); >>> + >>> + list_for_each_entry(desc, &pch->work_list, node) { >>> + if (desc->status == BUSY) { >>> + current_c = desc->txd.cookie; >>> + if (first) { >>> + first_c = desc->txd.cookie; >>> + first = false; >>> + } >>> + >>> + if (first_c < current_c) >>> + residue += desc->px.bytes; >>> + else { >>> + if (desc->rqcfg.src_inc && pl330_src_addr_in_desc(desc, sar)) { >>> + residue += desc->px.bytes; >>> + residue -= sar - desc->px.src_addr; >>> + } else if (desc->rqcfg.dst_inc && pl330_dst_addr_in_desc(desc, dar)) >>> { >>> + residue += desc->px.bytes; >>> + residue -= dar - desc->px.dst_addr; >>> + } >>> + } >>> + } else if (desc->status == PREP) >>> + residue += desc->px.bytes; >>> + >>> + if (desc->txd.cookie == used) >>> + break; >>> + } >>> + spin_unlock_irqrestore(&pch->lock, flags); >>> + dma_set_residue(txstate, residue); >>> + return ret; >>> } [snip] >> >> Any comment on this patch? > > Well it doesn't break audio, but I don't think it has the correct haviour > for all cases yet. OK. Any way of testing other cases like scatter-gather and memcopy. I verified memcopy in dmatest but it seems not doing anything with residue bytes. > > Again, the semantics are that it should return the progress of the transfer > > for which the allocation function returned the cookie that is passe to this May be my understanding is wrong. For clarification..In the snd_dmaengine_pcm_pointer it is subtracting the residue bytes from the total buffer bytes not from period bytes. So how it expects the progress of the transfer of the passed cookie which just holds period bytes? > > function. You have to consider that there might be multiple different > descriptors submitted and in the work list, not just the one we want to know Even though there are multiple descriptors in the work list, at a time only two descriptors are in busy state(as per the documentation in the driver) and all the descriptors cookie number is in incremental order. Not sure for other cases how it will be. > > the status of. The big problem with the pl330 driver is that the current > structure of the driver makes it not so easy to implement the residue > reporting correctly. > > - Lars Thanks Padma > >
On 3 December 2014 at 10:17, Padma Venkat <padma.kvr@gmail.com> wrote: > Hi Lars, > > [snip] >>>> + >>>> + ret = dma_cookie_status(chan, cookie, txstate); >>>> + if (ret == DMA_COMPLETE || !txstate) >>>> + return ret; >>>> + >>>> + used = txstate->used; >>>> + >>>> + spin_lock_irqsave(&pch->lock, flags); >>>> + sar = readl(regs + SA(thrd->id)); >>>> + dar = readl(regs + DA(thrd->id)); >>>> + >>>> + list_for_each_entry(desc, &pch->work_list, node) { >>>> + if (desc->status == BUSY) { >>>> + current_c = desc->txd.cookie; >>>> + if (first) { >>>> + first_c = desc->txd.cookie; >>>> + first = false; >>>> + } >>>> + >>>> + if (first_c < current_c) >>>> + residue += desc->px.bytes; >>>> + else { >>>> + if (desc->rqcfg.src_inc && pl330_src_addr_in_desc(desc, sar)) { >>>> + residue += desc->px.bytes; >>>> + residue -= sar - desc->px.src_addr; >>>> + } else if (desc->rqcfg.dst_inc && pl330_dst_addr_in_desc(desc, dar)) >>>> { >>>> + residue += desc->px.bytes; >>>> + residue -= dar - desc->px.dst_addr; >>>> + } >>>> + } >>>> + } else if (desc->status == PREP) >>>> + residue += desc->px.bytes; >>>> + >>>> + if (desc->txd.cookie == used) >>>> + break; >>>> + } >>>> + spin_unlock_irqrestore(&pch->lock, flags); >>>> + dma_set_residue(txstate, residue); >>>> + return ret; >>>> } > [snip] >>> >>> Any comment on this patch? >> >> Well it doesn't break audio, but I don't think it has the correct haviour >> for all cases yet. > > OK. Any way of testing other cases like scatter-gather and memcopy. I > verified memcopy in dmatest but it seems not doing anything with > residue bytes. > >> >> Again, the semantics are that it should return the progress of the transfer >> >> for which the allocation function returned the cookie that is passe to this > > May be my understanding is wrong. For clarification..In the > snd_dmaengine_pcm_pointer it is subtracting the residue bytes from the > total buffer bytes not from period bytes. So how it expects > the progress of the transfer of the passed cookie which just holds period bytes? > >> >> function. You have to consider that there might be multiple different >> descriptors submitted and in the work list, not just the one we want to know > > Even though there are multiple descriptors in the work list, at a time > only two descriptors are in busy state(as per the documentation in the > driver) and all the descriptors cookie number is in incremental order. > Not sure for other cases how it will be. > Yes. Tracing the history ... I think we could have done without 04abf5daf7d dma: pl330: Differentiate between submitted and issued descriptors The pl330 dmaengine driver currently does not differentiate between submitted and issued descriptors. It won't start transferring a newly submitted descriptor until issue_pending() is called, but only if it is idle. If it is active and a new descriptor is submitted before it goes idle it will happily start the newly submitted descriptor once all earlier submitted descriptors have been completed. This is not a 100% correct with regards to the dmaengine interface semantics. A descriptor is not supposed to be started until the next issue_pending() call after the descriptor has been submitted. because the reasoning above seems incorrect considering the following documentation... Documentation/crypto/async-tx-api.txt says " .... Once a driver-specific threshold is met the driver automatically issues pending operations. An application can force this event by calling async_tx_issue_pending_all(). ...." And include/linux/dmaengine.h says dma_async_tx_descriptor.tx_submit(): set the prepared descriptor(s) to be executed by the engine so theoretically a driver, not starting transfer until issue_pending(), is "broken". At best the patch@04abf5daf7d makes the driver slightly more complicated and the reason behind confusion such as in this thread. -jassi
On 12/03/2014 05:47 AM, Padma Venkat wrote: > Hi Lars, > > [snip] >>>> + >>>> + ret = dma_cookie_status(chan, cookie, txstate); >>>> + if (ret == DMA_COMPLETE || !txstate) >>>> + return ret; >>>> + >>>> + used = txstate->used; >>>> + >>>> + spin_lock_irqsave(&pch->lock, flags); >>>> + sar = readl(regs + SA(thrd->id)); >>>> + dar = readl(regs + DA(thrd->id)); >>>> + >>>> + list_for_each_entry(desc, &pch->work_list, node) { >>>> + if (desc->status == BUSY) { >>>> + current_c = desc->txd.cookie; >>>> + if (first) { >>>> + first_c = desc->txd.cookie; >>>> + first = false; >>>> + } >>>> + >>>> + if (first_c < current_c) >>>> + residue += desc->px.bytes; >>>> + else { >>>> + if (desc->rqcfg.src_inc && pl330_src_addr_in_desc(desc, sar)) { >>>> + residue += desc->px.bytes; >>>> + residue -= sar - desc->px.src_addr; >>>> + } else if (desc->rqcfg.dst_inc && pl330_dst_addr_in_desc(desc, dar)) >>>> { >>>> + residue += desc->px.bytes; >>>> + residue -= dar - desc->px.dst_addr; >>>> + } >>>> + } >>>> + } else if (desc->status == PREP) >>>> + residue += desc->px.bytes; >>>> + >>>> + if (desc->txd.cookie == used) >>>> + break; >>>> + } >>>> + spin_unlock_irqrestore(&pch->lock, flags); >>>> + dma_set_residue(txstate, residue); >>>> + return ret; >>>> } > [snip] >>> >>> Any comment on this patch? >> >> Well it doesn't break audio, but I don't think it has the correct haviour >> for all cases yet. > > OK. Any way of testing other cases like scatter-gather and memcopy. I > verified memcopy in dmatest but it seems not doing anything with > residue bytes. E.g. I think your current patch fails if more than one transfer has been submitted. In that case you'll count the bytes for all of them rather than just the one requested. > >> >> Again, the semantics are that it should return the progress of the transfer >> >> for which the allocation function returned the cookie that is passe to this > > May be my understanding is wrong. For clarification..In the > snd_dmaengine_pcm_pointer it is subtracting the residue bytes from the > total buffer bytes not from period bytes. So how it expects > the progress of the transfer of the passed cookie which just holds period bytes? The issue that makes implementing this correctly for the pl330 driver complicated is that the driver allocates one cookie per segment/period, but the external API works with one cookie per transfer. All those cookies that are assigned to the segments/periods that are not the first in a transfer are not externally visible. dmaengine_prep_slave_sg() and friends create a transfer and return descriptor for this transfer with a single cookie. If that cookie is passed to dmaengine_tx_status() the function is supposed to report the progress on that one transfer that was previously allocated and returned that cookie number. residue is the total number of bytes that are still left to to be processed for that transfer. I've started reworking the pl330 driver to only have a single cookie per transfer, instead of one per period/segment. This makes implementing the residue reporting a lot easier. I never finished that work though, but I can try to see if I can revive it next week. - Lars
On Tue, Dec 02, 2014 at 06:25:49PM +0100, Lars-Peter Clausen wrote: > On 12/02/2014 06:38 AM, Padma Venkat wrote: > > Well it doesn't break audio, but I don't think it has the correct > haviour for all cases yet. > > Again, the semantics are that it should return the progress of the > transfer for which the allocation function returned the cookie that > is passe to this function. You have to consider that there might be > multiple different descriptors submitted and in the work list, not > just the one we want to know the status of. The big problem with the > pl330 driver is that the current structure of the driver makes it > not so easy to implement the residue reporting correctly. well you can use the completed_cookie as hint. This was last trasaction which was issues, so calculate remianing bytes for this and subsequent ones resiude would only be size of transaction
On Wed, Dec 03, 2014 at 01:21:37PM +0530, Jassi Brar wrote: > On 3 December 2014 at 10:17, Padma Venkat <padma.kvr@gmail.com> wrote: > > Hi Lars, > > > > [snip] > >>>> + > >>>> + ret = dma_cookie_status(chan, cookie, txstate); > >>>> + if (ret == DMA_COMPLETE || !txstate) > >>>> + return ret; > >>>> + > >>>> + used = txstate->used; > >>>> + > >>>> + spin_lock_irqsave(&pch->lock, flags); > >>>> + sar = readl(regs + SA(thrd->id)); > >>>> + dar = readl(regs + DA(thrd->id)); > >>>> + > >>>> + list_for_each_entry(desc, &pch->work_list, node) { > >>>> + if (desc->status == BUSY) { > >>>> + current_c = desc->txd.cookie; > >>>> + if (first) { > >>>> + first_c = desc->txd.cookie; > >>>> + first = false; > >>>> + } > >>>> + > >>>> + if (first_c < current_c) > >>>> + residue += desc->px.bytes; > >>>> + else { > >>>> + if (desc->rqcfg.src_inc && pl330_src_addr_in_desc(desc, sar)) { > >>>> + residue += desc->px.bytes; > >>>> + residue -= sar - desc->px.src_addr; > >>>> + } else if (desc->rqcfg.dst_inc && pl330_dst_addr_in_desc(desc, dar)) > >>>> { > >>>> + residue += desc->px.bytes; > >>>> + residue -= dar - desc->px.dst_addr; > >>>> + } > >>>> + } > >>>> + } else if (desc->status == PREP) > >>>> + residue += desc->px.bytes; > >>>> + > >>>> + if (desc->txd.cookie == used) > >>>> + break; > >>>> + } > >>>> + spin_unlock_irqrestore(&pch->lock, flags); > >>>> + dma_set_residue(txstate, residue); > >>>> + return ret; > >>>> } > > [snip] > >>> > >>> Any comment on this patch? > >> > >> Well it doesn't break audio, but I don't think it has the correct haviour > >> for all cases yet. > > > > OK. Any way of testing other cases like scatter-gather and memcopy. I > > verified memcopy in dmatest but it seems not doing anything with > > residue bytes. > > > >> > >> Again, the semantics are that it should return the progress of the transfer > >> > >> for which the allocation function returned the cookie that is passe to this > > > > May be my understanding is wrong. For clarification..In the > > snd_dmaengine_pcm_pointer it is subtracting the residue bytes from the > > total buffer bytes not from period bytes. So how it expects > > the progress of the transfer of the passed cookie which just holds period bytes? > > > >> > >> function. You have to consider that there might be multiple different > >> descriptors submitted and in the work list, not just the one we want to know > > > > Even though there are multiple descriptors in the work list, at a time > > only two descriptors are in busy state(as per the documentation in the > > driver) and all the descriptors cookie number is in incremental order. > > Not sure for other cases how it will be. > > > Yes. > > Tracing the history ... I think we could have done without > > 04abf5daf7d dma: pl330: Differentiate between submitted and issued descriptors > > The pl330 dmaengine driver currently does not differentiate > between submitted > and issued descriptors. It won't start transferring a newly submitted > descriptor until issue_pending() is called, but only if it is idle. If it is > active and a new descriptor is submitted before it goes idle it will happily > start the newly submitted descriptor once all earlier submitted > descriptors have > been completed. This is not a 100% correct with regards to the dmaengine > interface semantics. A descriptor is not supposed to be started > until the next > issue_pending() call after the descriptor has been submitted. > > > because the reasoning above seems incorrect considering the following > documentation... > > Documentation/crypto/async-tx-api.txt says > " .... Once a driver-specific threshold is met the driver > automatically issues pending operations. An application can force this > event by calling async_tx_issue_pending_all(). ...." > > And > > include/linux/dmaengine.h says > dma_async_tx_descriptor.tx_submit(): set the prepared descriptor(s) > to be executed by the engine "to be" here can lead to different conculsion. I will reword this :) @tx_submit: accept the descriptor and assign ordered cookie and mark the descriptor pending. To be pushed on .issue_pending() call
On Wed, Dec 03, 2014 at 01:21:37PM +0530, Jassi Brar wrote: > because the reasoning above seems incorrect considering the following > documentation... > > Documentation/crypto/async-tx-api.txt says ^^^^^^^^^^^^ async-tx-api is not the DMA slave API. > " .... Once a driver-specific threshold is met the driver > automatically issues pending operations. An application can force this > event by calling async_tx_issue_pending_all(). ...." ^^^^^^^^^^^^^^^^^^^^^^^^^^ DMA slave users do not call this function. This documentation is not relevant for DMA slave. > include/linux/dmaengine.h says > dma_async_tx_descriptor.tx_submit(): set the prepared descriptor(s) > to be executed by the engine It doesn't say when. > so theoretically a driver, not starting transfer until > issue_pending(), is "broken". It isn't. DMA slave engine implementations have been needing the issue_pending() call since their dawn, so it's something that they've always needed. > At best the patch@04abf5daf7d makes the driver slightly more > complicated and the reason behind confusion such as in this thread. That may be, and yes, it _might_ be worth discussing whether this should be relaxed or not, but that should be done as a proposal, not trying to hide it as a bug fix. It isn't.
On 5 December 2014 at 20:48, Russell King - ARM Linux <linux@arm.linux.org.uk> wrote: > On Wed, Dec 03, 2014 at 01:21:37PM +0530, Jassi Brar wrote: >> because the reasoning above seems incorrect considering the following >> documentation... >> >> Documentation/crypto/async-tx-api.txt says > ^^^^^^^^^^^^ > > async-tx-api is not the DMA slave API. > >> " .... Once a driver-specific threshold is met the driver >> automatically issues pending operations. An application can force this >> event by calling async_tx_issue_pending_all(). ...." > ^^^^^^^^^^^^^^^^^^^^^^^^^^ > > DMA slave users do not call this function. This documentation is not > relevant for DMA slave. > The APIs (device_issue_pending, tx_submit etc) are same for Slave and Async. async_tx_issue_pending_all() for Async and dma_async_issue_pending() for Slave both boil down to device_issue_pending() >> include/linux/dmaengine.h says >> dma_async_tx_descriptor.tx_submit(): set the prepared descriptor(s) >> to be executed by the engine > > It doesn't say when. > :) OK >> so theoretically a driver, not starting transfer until >> issue_pending(), is "broken". > > It isn't. DMA slave engine implementations have been needing the > issue_pending() call since their dawn, so it's something that they've > always needed. > >> At best the patch@04abf5daf7d makes the driver slightly more >> complicated and the reason behind confusion such as in this thread. > > That may be, and yes, it _might_ be worth discussing whether this should > be relaxed or not, but that should be done as a proposal, not trying to > hide it as a bug fix. It isn't. > It does, though, create an "awkward situation" when a channel is active while new requests are submitted - why would the channel want to stop after current transfer and await fresh issue_pending()? It's not that the request can be modified after submission. And it isn't that tx_submit() is meant for 'sleepable' preparation for the transfer and we need another call to be issued from atomic context. From what I see most drivers don't need to sleep in tx_submit(). In fact, from a quick look most clients seem to suffer from the ritual i.e, there's nothing between tx_submit() and issue_pending() calls. And when there is indeed some code, it seems that can be moved just before tx_submit(). Last and apparently the least of all, we can never enforce the same behavior of the api on Async dma and have uniform behavior over the api. So, if all tx_submit() does is put the request in controller driver's internal queue and the client can't touch the request after tx_submit(), why not merge the tx_submit() and issue_pending()? Thanks.
On Sat, Dec 06, 2014 at 12:31:01PM +0530, Jassi Brar wrote: > It does, though, create an "awkward situation" when a channel is > active while new requests are submitted - why would the channel want > to stop after current transfer and await fresh issue_pending()? It's > not that the request can be modified after submission. > > And it isn't that tx_submit() is meant for 'sleepable' preparation for > the transfer and we need another call to be issued from atomic > context. From what I see most drivers don't need to sleep in > tx_submit(). In fact, from a quick look most clients seem to suffer > from the ritual i.e, there's nothing between tx_submit() and > issue_pending() calls. And when there is indeed some code, it seems > that can be moved just before tx_submit(). > > Last and apparently the least of all, we can never enforce the same > behavior of the api on Async dma and have uniform behavior over the > api. > > So, if all tx_submit() does is put the request in controller driver's > internal queue and the client can't touch the request after > tx_submit(), why not merge the tx_submit() and issue_pending()? You are thinking from an API point and not what can be done with this API. Today this API allows you to collate all pending txn and start while dmaengine driver can collate and submit as a batch to hardware and not waste time in getting irq and then setting next one. Sadly none of the drivers use this feature... I actually like the split model, you can also prepare txn ahead of time and submit them when needed. If you don't like this, then please do as most implementation do today, call prepare and submit in series. Flexiblity in API is a better option IMO Thanks
On Mon, Dec 08, 2014 at 06:37:27PM +0530, Vinod Koul wrote: > I actually like the split model, you can also prepare txn ahead of time and > submit them when needed. Actually, you can't - that's not permitted. I have email(s) from Dan explicitly stating that it is permitted for a driver to take a spinlock in their prepare callback, and release it when the descriptor is submitted. Several DMA engine drivers (particularly those in for async_tx) do exactly that. The reason that submit is separate from prepare is to allow DMA engine users to set a callback - if it weren't for that, there wouldn't be a submit step, prepare would have done everything.
On Mon, Dec 08, 2014 at 02:23:21PM +0000, Russell King - ARM Linux wrote: > On Mon, Dec 08, 2014 at 06:37:27PM +0530, Vinod Koul wrote: > > I actually like the split model, you can also prepare txn ahead of time and > > submit them when needed. > > Actually, you can't - that's not permitted. I have email(s) from Dan > explicitly stating that it is permitted for a driver to take a spinlock > in their prepare callback, and release it when the descriptor is > submitted. Several DMA engine drivers (particularly those in for > async_tx) do exactly that. > > The reason that submit is separate from prepare is to allow DMA engine > users to set a callback - if it weren't for that, there wouldn't be a > submit step, prepare would have done everything. Yes thats right. Do you mind pointing to thread Dan replied, I would like to add these bits and anything else missing to Documentation Thanks
On 8 December 2014 at 18:37, Vinod Koul <vinod.koul@intel.com> wrote: > On Sat, Dec 06, 2014 at 12:31:01PM +0530, Jassi Brar wrote: >> It does, though, create an "awkward situation" when a channel is >> active while new requests are submitted - why would the channel want >> to stop after current transfer and await fresh issue_pending()? It's >> not that the request can be modified after submission. >> >> And it isn't that tx_submit() is meant for 'sleepable' preparation for >> the transfer and we need another call to be issued from atomic >> context. From what I see most drivers don't need to sleep in >> tx_submit(). In fact, from a quick look most clients seem to suffer >> from the ritual i.e, there's nothing between tx_submit() and >> issue_pending() calls. And when there is indeed some code, it seems >> that can be moved just before tx_submit(). >> >> Last and apparently the least of all, we can never enforce the same >> behavior of the api on Async dma and have uniform behavior over the >> api. >> >> So, if all tx_submit() does is put the request in controller driver's >> internal queue and the client can't touch the request after >> tx_submit(), why not merge the tx_submit() and issue_pending()? > You are thinking from an API point and not what can be done with this API. > "awkward situation" and "ritual suffering" above, are two practical issues that we see. > Today this API allows you to collate all pending txn and start while > dmaengine driver can collate and submit as a batch to hardware and not waste > time in getting irq and then setting next one. Sadly none of the drivers use > this feature... > So we at least agree that the "feature" is unused so far. And I doubt if it would ever make sense to batch transfers in slave dma (unlike offloading on async-dma) because the user has to also setup the master for each queued request, mostly if not always. > I actually like the split model, you can also prepare txn ahead of time and > submit them when needed. If you don't like this, then please do as most > implementation do today, call prepare and submit in series. Flexiblity in > API is a better option IMO > As Russell pointed out, that ain't the case either. So we are yet to figure out benefits of having explicit issue_pending() after tx_submit(). -Jassi
On Tue, Dec 09, 2014 at 08:48:04PM +0530, Jassi Brar wrote: > As Russell pointed out, that ain't the case either. > So we are yet to figure out benefits of having explicit > issue_pending() after tx_submit(). callback ?
On 11 December 2014 at 10:17, Vinod Koul <vinod.koul@intel.com> wrote: > On Tue, Dec 09, 2014 at 08:48:04PM +0530, Jassi Brar wrote: >> As Russell pointed out, that ain't the case either. >> So we are yet to figure out benefits of having explicit >> issue_pending() after tx_submit(). > callback ? > The callback is set after prep() and before tx_submit(), but here we talk after tx_submit().
Hi Vinod, Hi Russell, On 11 December 2014 at 11:42, Jassi Brar <jaswinder.singh@linaro.org> wrote: > On 11 December 2014 at 10:17, Vinod Koul <vinod.koul@intel.com> wrote: >> On Tue, Dec 09, 2014 at 08:48:04PM +0530, Jassi Brar wrote: >>> As Russell pointed out, that ain't the case either. >>> So we are yet to figure out benefits of having explicit >>> issue_pending() after tx_submit(). >> callback ? >> > The callback is set after prep() and before tx_submit(), but here we > talk after tx_submit(). Perhaps the idea dates back to async-only days, when client drivers would prepare and queue descriptors in the controller driver rather than having to manage the dependency queues themselves (?). Today ~95% clients are slave and I am yet to find one that really can't work with submit and issue_pending tied together. Not to forget the 100% of the controller drivers have to manage 'submitted' and 'active' queues -- only to have arguably negative side-effects. If we agree that clubbing submit and issue_pending is the right thing to do, I can start converting the ~90 client drivers. Please let me know either way. Cheers!
diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c index b7493d2..db880ae 100644 --- a/drivers/dma/pl330.c +++ b/drivers/dma/pl330.c @@ -2182,11 +2182,74 @@ static void pl330_free_chan_resources(struct dma_chan *chan) pm_runtime_put_autosuspend(pch->dmac->ddma.dev); } +static inline int +pl330_src_addr_in_desc(struct dma_pl330_desc *desc, unsigned int sar) +{ + return ((desc->px.src_addr <= sar) && + (sar <= (desc->px.src_addr + desc->px.bytes))); +} + +static inline int +pl330_dst_addr_in_desc(struct dma_pl330_desc *desc, unsigned int dar) +{ + return ((desc->px.dst_addr <= dar) && + (dar <= (desc->px.dst_addr + desc->px.bytes))); +} + static enum dma_status pl330_tx_status(struct dma_chan *chan, dma_cookie_t cookie, struct dma_tx_state *txstate) { - return dma_cookie_status(chan, cookie, txstate); + dma_addr_t sar, dar; + struct dma_pl330_chan *pch = to_pchan(chan); + void __iomem *regs = pch->dmac->base; + struct pl330_thread *thrd = pch->thread; + struct dma_pl330_desc *desc; + unsigned int residue = 0; + unsigned long flags; + bool first = true; + dma_cookie_t first_c, current_c; + dma_cookie_t used; + enum dma_status ret; + + ret = dma_cookie_status(chan, cookie, txstate); + if (ret == DMA_COMPLETE || !txstate) + return ret; + + used = txstate->used; + + spin_lock_irqsave(&pch->lock, flags); + sar = readl(regs + SA(thrd->id)); + dar = readl(regs + DA(thrd->id)); + + list_for_each_entry(desc, &pch->work_list, node) { + if (desc->status == BUSY) { + current_c = desc->txd.cookie; + if (first) { + first_c = desc->txd.cookie; + first = false; + } + + if (first_c < current_c) + residue += desc->px.bytes; + else { + if (desc->rqcfg.src_inc && pl330_src_addr_in_desc(desc, sar)) { + residue += desc->px.bytes; + residue -= sar - desc->px.src_addr; + } else if (desc->rqcfg.dst_inc && pl330_dst_addr_in_desc(desc, dar)) { + residue += desc->px.bytes; + residue -= dar - desc->px.dst_addr; + } + } + } else if (desc->status == PREP) + residue += desc->px.bytes; + + if (desc->txd.cookie == used) + break; + } + spin_unlock_irqrestore(&pch->lock, flags); + dma_set_residue(txstate, residue); + return ret; } static void pl330_issue_pending(struct dma_chan *chan) @@ -2631,7 +2694,7 @@ static int pl330_dma_device_slave_caps(struct dma_chan *dchan, caps->directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV); caps->cmd_pause = false; caps->cmd_terminate = true; - caps->residue_granularity = DMA_RESIDUE_GRANULARITY_DESCRIPTOR; + caps->residue_granularity = DMA_RESIDUE_GRANULARITY_SEGMENT; return 0; }
Fill txstate.residue with the amount of bytes remaining in the current transfer if the transfer is not complete. This will be of particular use to i2s DMA transfers, providing more accurate hw_ptr values to ASoC. I had taken the code from Dylan Reid <dgreid@chromium.org> patch from the below link and modified according to the current dmaengine framework. http://comments.gmane.org/gmane.linux.kernel.samsung-soc/23007 Cc: Dylan Reid <dgreid@chromium.org> Signed-off-by: Padmavathi Venna <padma.v@samsung.com> --- This patch has been tested for audio playback on exynos5420 peach-pit. drivers/dma/pl330.c | 67 +++++++++++++++++++++++++++++++++++++++++++++++++- 1 files changed, 65 insertions(+), 2 deletions(-)