diff mbox

dmaengine: pl330: Set residue in tx_status callback

Message ID 1416995095-13763-1-git-send-email-padma.v@samsung.com (mailing list archive)
State New, archived
Headers show

Commit Message

Padmavathi Venna Nov. 26, 2014, 9:44 a.m. UTC
Fill txstate.residue with the amount of bytes remaining in the current
transfer if the transfer is not complete.  This will be of particular
use to i2s DMA transfers, providing more accurate hw_ptr values to ASoC.

I had taken the code from Dylan Reid <dgreid@chromium.org> patch from the
below link and modified according to the current dmaengine framework.
http://comments.gmane.org/gmane.linux.kernel.samsung-soc/23007

Cc: Dylan Reid <dgreid@chromium.org>
Signed-off-by: Padmavathi Venna <padma.v@samsung.com>
---

This patch has been tested for audio playback on exynos5420 peach-pit.

 drivers/dma/pl330.c |   67 +++++++++++++++++++++++++++++++++++++++++++++++++-
 1 files changed, 65 insertions(+), 2 deletions(-)

Comments

padma venkat Dec. 2, 2014, 5:38 a.m. UTC | #1
Hi Vinod/Lars,

On 11/26/14, Padmavathi Venna <padma.v@samsung.com> wrote:
> Fill txstate.residue with the amount of bytes remaining in the current
> transfer if the transfer is not complete.  This will be of particular
> use to i2s DMA transfers, providing more accurate hw_ptr values to ASoC.
>
> I had taken the code from Dylan Reid <dgreid@chromium.org> patch from the
> below link and modified according to the current dmaengine framework.
> http://comments.gmane.org/gmane.linux.kernel.samsung-soc/23007
>
> Cc: Dylan Reid <dgreid@chromium.org>
> Signed-off-by: Padmavathi Venna <padma.v@samsung.com>
> ---
>
> This patch has been tested for audio playback on exynos5420 peach-pit.
>
>  drivers/dma/pl330.c |   67
> +++++++++++++++++++++++++++++++++++++++++++++++++-
>  1 files changed, 65 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c
> index b7493d2..db880ae 100644
> --- a/drivers/dma/pl330.c
> +++ b/drivers/dma/pl330.c
> @@ -2182,11 +2182,74 @@ static void pl330_free_chan_resources(struct
> dma_chan *chan)
>  	pm_runtime_put_autosuspend(pch->dmac->ddma.dev);
>  }
>
> +static inline int
> +pl330_src_addr_in_desc(struct dma_pl330_desc *desc, unsigned int sar)
> +{
> +	return ((desc->px.src_addr <= sar) &&
> +		(sar <= (desc->px.src_addr + desc->px.bytes)));
> +}
> +
> +static inline int
> +pl330_dst_addr_in_desc(struct dma_pl330_desc *desc, unsigned int dar)
> +{
> +	return ((desc->px.dst_addr <= dar) &&
> +		(dar <= (desc->px.dst_addr + desc->px.bytes)));
> +}
> +
>  static enum dma_status
>  pl330_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
>  		 struct dma_tx_state *txstate)
>  {
> -	return dma_cookie_status(chan, cookie, txstate);
> +	dma_addr_t sar, dar;
> +	struct dma_pl330_chan *pch = to_pchan(chan);
> +	void __iomem *regs = pch->dmac->base;
> +	struct pl330_thread *thrd = pch->thread;
> +	struct dma_pl330_desc *desc;
> +	unsigned int residue = 0;
> +	unsigned long flags;
> +	bool first = true;
> +	dma_cookie_t first_c, current_c;
> +	dma_cookie_t used;
> +	enum dma_status ret;
> +
> +	ret = dma_cookie_status(chan, cookie, txstate);
> +	if (ret == DMA_COMPLETE || !txstate)
> +		return ret;
> +
> +	used = txstate->used;
> +
> +	spin_lock_irqsave(&pch->lock, flags);
> +	sar = readl(regs + SA(thrd->id));
> +	dar = readl(regs + DA(thrd->id));
> +
> +	list_for_each_entry(desc, &pch->work_list, node) {
> +		if (desc->status == BUSY) {
> +			current_c = desc->txd.cookie;
> +			if (first) {
> +				first_c = desc->txd.cookie;
> +				first = false;
> +			}
> +
> +			if (first_c < current_c)
> +				residue += desc->px.bytes;
> +			else {
> +				if (desc->rqcfg.src_inc && pl330_src_addr_in_desc(desc, sar)) {
> +					residue += desc->px.bytes;
> +					residue -= sar - desc->px.src_addr;
> +				} else if (desc->rqcfg.dst_inc && pl330_dst_addr_in_desc(desc, dar)) {
> +					residue += desc->px.bytes;
> +					residue -= dar - desc->px.dst_addr;
> +				}
> +			}
> +		} else if (desc->status == PREP)
> +			residue += desc->px.bytes;
> +
> +		if (desc->txd.cookie == used)
> +			break;
> +	}
> +	spin_unlock_irqrestore(&pch->lock, flags);
> +	dma_set_residue(txstate, residue);
> +	return ret;
>  }
>
>  static void pl330_issue_pending(struct dma_chan *chan)
> @@ -2631,7 +2694,7 @@ static int pl330_dma_device_slave_caps(struct dma_chan
> *dchan,
>  	caps->directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
>  	caps->cmd_pause = false;
>  	caps->cmd_terminate = true;
> -	caps->residue_granularity = DMA_RESIDUE_GRANULARITY_DESCRIPTOR;
> +	caps->residue_granularity = DMA_RESIDUE_GRANULARITY_SEGMENT;
>
>  	return 0;
>  }

Any comment on this patch?

Thanks
Padma
> --
> 1.7.4.4
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-samsung-soc"
> in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>
Lars-Peter Clausen Dec. 2, 2014, 5:25 p.m. UTC | #2
On 12/02/2014 06:38 AM, Padma Venkat wrote:
> Hi Vinod/Lars,
>
> On 11/26/14, Padmavathi Venna <padma.v@samsung.com> wrote:
>> Fill txstate.residue with the amount of bytes remaining in the current
>> transfer if the transfer is not complete.  This will be of particular
>> use to i2s DMA transfers, providing more accurate hw_ptr values to ASoC.
>>
>> I had taken the code from Dylan Reid <dgreid@chromium.org> patch from the
>> below link and modified according to the current dmaengine framework.
>> http://comments.gmane.org/gmane.linux.kernel.samsung-soc/23007
>>
>> Cc: Dylan Reid <dgreid@chromium.org>
>> Signed-off-by: Padmavathi Venna <padma.v@samsung.com>
>> ---
>>
>> This patch has been tested for audio playback on exynos5420 peach-pit.
>>
>>   drivers/dma/pl330.c |   67
>> +++++++++++++++++++++++++++++++++++++++++++++++++-
>>   1 files changed, 65 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c
>> index b7493d2..db880ae 100644
>> --- a/drivers/dma/pl330.c
>> +++ b/drivers/dma/pl330.c
>> @@ -2182,11 +2182,74 @@ static void pl330_free_chan_resources(struct
>> dma_chan *chan)
>>   	pm_runtime_put_autosuspend(pch->dmac->ddma.dev);
>>   }
>>
>> +static inline int
>> +pl330_src_addr_in_desc(struct dma_pl330_desc *desc, unsigned int sar)
>> +{
>> +	return ((desc->px.src_addr <= sar) &&
>> +		(sar <= (desc->px.src_addr + desc->px.bytes)));
>> +}
>> +
>> +static inline int
>> +pl330_dst_addr_in_desc(struct dma_pl330_desc *desc, unsigned int dar)
>> +{
>> +	return ((desc->px.dst_addr <= dar) &&
>> +		(dar <= (desc->px.dst_addr + desc->px.bytes)));
>> +}
>> +
>>   static enum dma_status
>>   pl330_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
>>   		 struct dma_tx_state *txstate)
>>   {
>> -	return dma_cookie_status(chan, cookie, txstate);
>> +	dma_addr_t sar, dar;
>> +	struct dma_pl330_chan *pch = to_pchan(chan);
>> +	void __iomem *regs = pch->dmac->base;
>> +	struct pl330_thread *thrd = pch->thread;
>> +	struct dma_pl330_desc *desc;
>> +	unsigned int residue = 0;
>> +	unsigned long flags;
>> +	bool first = true;
>> +	dma_cookie_t first_c, current_c;
>> +	dma_cookie_t used;
>> +	enum dma_status ret;
>> +
>> +	ret = dma_cookie_status(chan, cookie, txstate);
>> +	if (ret == DMA_COMPLETE || !txstate)
>> +		return ret;
>> +
>> +	used = txstate->used;
>> +
>> +	spin_lock_irqsave(&pch->lock, flags);
>> +	sar = readl(regs + SA(thrd->id));
>> +	dar = readl(regs + DA(thrd->id));
>> +
>> +	list_for_each_entry(desc, &pch->work_list, node) {
>> +		if (desc->status == BUSY) {
>> +			current_c = desc->txd.cookie;
>> +			if (first) {
>> +				first_c = desc->txd.cookie;
>> +				first = false;
>> +			}
>> +
>> +			if (first_c < current_c)
>> +				residue += desc->px.bytes;
>> +			else {
>> +				if (desc->rqcfg.src_inc && pl330_src_addr_in_desc(desc, sar)) {
>> +					residue += desc->px.bytes;
>> +					residue -= sar - desc->px.src_addr;
>> +				} else if (desc->rqcfg.dst_inc && pl330_dst_addr_in_desc(desc, dar)) {
>> +					residue += desc->px.bytes;
>> +					residue -= dar - desc->px.dst_addr;
>> +				}
>> +			}
>> +		} else if (desc->status == PREP)
>> +			residue += desc->px.bytes;
>> +
>> +		if (desc->txd.cookie == used)
>> +			break;
>> +	}
>> +	spin_unlock_irqrestore(&pch->lock, flags);
>> +	dma_set_residue(txstate, residue);
>> +	return ret;
>>   }
>>
>>   static void pl330_issue_pending(struct dma_chan *chan)
>> @@ -2631,7 +2694,7 @@ static int pl330_dma_device_slave_caps(struct dma_chan
>> *dchan,
>>   	caps->directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
>>   	caps->cmd_pause = false;
>>   	caps->cmd_terminate = true;
>> -	caps->residue_granularity = DMA_RESIDUE_GRANULARITY_DESCRIPTOR;
>> +	caps->residue_granularity = DMA_RESIDUE_GRANULARITY_SEGMENT;
>>
>>   	return 0;
>>   }
>
> Any comment on this patch?

Well it doesn't break audio, but I don't think it has the correct haviour 
for all cases yet.

Again, the semantics are that it should return the progress of the transfer 
for which the allocation function returned the cookie that is passe to this 
function. You have to consider that there might be multiple different 
descriptors submitted and in the work list, not just the one we want to know 
the status of. The big problem with the pl330 driver is that the current 
structure of the driver makes it not so easy to implement the residue 
reporting correctly.

- Lars
padma venkat Dec. 3, 2014, 4:47 a.m. UTC | #3
Hi Lars,

[snip]
>>> +
>>> +	ret = dma_cookie_status(chan, cookie, txstate);
>>> +	if (ret == DMA_COMPLETE || !txstate)
>>> +		return ret;
>>> +
>>> +	used = txstate->used;
>>> +
>>> +	spin_lock_irqsave(&pch->lock, flags);
>>> +	sar = readl(regs + SA(thrd->id));
>>> +	dar = readl(regs + DA(thrd->id));
>>> +
>>> +	list_for_each_entry(desc, &pch->work_list, node) {
>>> +		if (desc->status == BUSY) {
>>> +			current_c = desc->txd.cookie;
>>> +			if (first) {
>>> +				first_c = desc->txd.cookie;
>>> +				first = false;
>>> +			}
>>> +
>>> +			if (first_c < current_c)
>>> +				residue += desc->px.bytes;
>>> +			else {
>>> +				if (desc->rqcfg.src_inc && pl330_src_addr_in_desc(desc, sar)) {
>>> +					residue += desc->px.bytes;
>>> +					residue -= sar - desc->px.src_addr;
>>> +				} else if (desc->rqcfg.dst_inc && pl330_dst_addr_in_desc(desc, dar))
>>> {
>>> +					residue += desc->px.bytes;
>>> +					residue -= dar - desc->px.dst_addr;
>>> +				}
>>> +			}
>>> +		} else if (desc->status == PREP)
>>> +			residue += desc->px.bytes;
>>> +
>>> +		if (desc->txd.cookie == used)
>>> +			break;
>>> +	}
>>> +	spin_unlock_irqrestore(&pch->lock, flags);
>>> +	dma_set_residue(txstate, residue);
>>> +	return ret;
>>>   }
[snip]
>>
>> Any comment on this patch?
>
> Well it doesn't break audio, but I don't think it has the correct haviour
> for all cases yet.

OK. Any way of testing other cases like scatter-gather and memcopy.  I
verified memcopy in dmatest but it seems not doing anything with
residue bytes.

>
> Again, the semantics are that it should return the progress of the transfer
>
> for which the allocation function returned the cookie that is passe to this

May be my understanding is wrong. For clarification..In the
snd_dmaengine_pcm_pointer it is subtracting the residue bytes from the
total buffer bytes not from period bytes. So how it expects
the progress of the transfer of the passed cookie which just holds period bytes?

>
> function. You have to consider that there might be multiple different
> descriptors submitted and in the work list, not just the one we want to know

Even though there are multiple descriptors in the work list, at a time
only two descriptors are in busy state(as per the documentation in the
driver) and all the descriptors cookie number is in incremental order.
Not sure for other cases how it will be.

>
> the status of. The big problem with the pl330 driver is that the current
> structure of the driver makes it not so easy to implement the residue
> reporting correctly.
>
> - Lars

Thanks
Padma
>
>
Jassi Brar Dec. 3, 2014, 7:51 a.m. UTC | #4
On 3 December 2014 at 10:17, Padma Venkat <padma.kvr@gmail.com> wrote:
> Hi Lars,
>
> [snip]
>>>> +
>>>> +   ret = dma_cookie_status(chan, cookie, txstate);
>>>> +   if (ret == DMA_COMPLETE || !txstate)
>>>> +           return ret;
>>>> +
>>>> +   used = txstate->used;
>>>> +
>>>> +   spin_lock_irqsave(&pch->lock, flags);
>>>> +   sar = readl(regs + SA(thrd->id));
>>>> +   dar = readl(regs + DA(thrd->id));
>>>> +
>>>> +   list_for_each_entry(desc, &pch->work_list, node) {
>>>> +           if (desc->status == BUSY) {
>>>> +                   current_c = desc->txd.cookie;
>>>> +                   if (first) {
>>>> +                           first_c = desc->txd.cookie;
>>>> +                           first = false;
>>>> +                   }
>>>> +
>>>> +                   if (first_c < current_c)
>>>> +                           residue += desc->px.bytes;
>>>> +                   else {
>>>> +                           if (desc->rqcfg.src_inc && pl330_src_addr_in_desc(desc, sar)) {
>>>> +                                   residue += desc->px.bytes;
>>>> +                                   residue -= sar - desc->px.src_addr;
>>>> +                           } else if (desc->rqcfg.dst_inc && pl330_dst_addr_in_desc(desc, dar))
>>>> {
>>>> +                                   residue += desc->px.bytes;
>>>> +                                   residue -= dar - desc->px.dst_addr;
>>>> +                           }
>>>> +                   }
>>>> +           } else if (desc->status == PREP)
>>>> +                   residue += desc->px.bytes;
>>>> +
>>>> +           if (desc->txd.cookie == used)
>>>> +                   break;
>>>> +   }
>>>> +   spin_unlock_irqrestore(&pch->lock, flags);
>>>> +   dma_set_residue(txstate, residue);
>>>> +   return ret;
>>>>   }
> [snip]
>>>
>>> Any comment on this patch?
>>
>> Well it doesn't break audio, but I don't think it has the correct haviour
>> for all cases yet.
>
> OK. Any way of testing other cases like scatter-gather and memcopy.  I
> verified memcopy in dmatest but it seems not doing anything with
> residue bytes.
>
>>
>> Again, the semantics are that it should return the progress of the transfer
>>
>> for which the allocation function returned the cookie that is passe to this
>
> May be my understanding is wrong. For clarification..In the
> snd_dmaengine_pcm_pointer it is subtracting the residue bytes from the
> total buffer bytes not from period bytes. So how it expects
> the progress of the transfer of the passed cookie which just holds period bytes?
>
>>
>> function. You have to consider that there might be multiple different
>> descriptors submitted and in the work list, not just the one we want to know
>
> Even though there are multiple descriptors in the work list, at a time
> only two descriptors are in busy state(as per the documentation in the
> driver) and all the descriptors cookie number is in incremental order.
> Not sure for other cases how it will be.
>
Yes.

Tracing the history ... I think we could have done without

04abf5daf7d  dma: pl330: Differentiate between submitted and issued descriptors

    The pl330 dmaengine driver currently does not differentiate
between submitted
    and issued descriptors. It won't start transferring a newly submitted
    descriptor until issue_pending() is called, but only if it is idle. If it is
    active and a new descriptor is submitted before it goes idle it will happily
    start the newly submitted descriptor once all earlier submitted
descriptors have
    been completed. This is not a 100% correct with regards to the dmaengine
    interface semantics. A descriptor is not supposed to be started
until the next
    issue_pending() call after the descriptor has been submitted.


because the reasoning above seems incorrect considering the following
documentation...

Documentation/crypto/async-tx-api.txt says
  " .... Once a driver-specific threshold is met the driver
automatically issues pending operations.  An application can force this
event by calling async_tx_issue_pending_all(). ...."

And

include/linux/dmaengine.h says
  dma_async_tx_descriptor.tx_submit(): set the prepared descriptor(s)
to be executed by the engine

so theoretically a driver, not starting transfer until
issue_pending(), is "broken".
At best the patch@04abf5daf7d makes the driver slightly more
complicated and the reason behind confusion such as in this thread.

-jassi
Lars-Peter Clausen Dec. 4, 2014, 8:15 p.m. UTC | #5
On 12/03/2014 05:47 AM, Padma Venkat wrote:
> Hi Lars,
>
> [snip]
>>>> +
>>>> +	ret = dma_cookie_status(chan, cookie, txstate);
>>>> +	if (ret == DMA_COMPLETE || !txstate)
>>>> +		return ret;
>>>> +
>>>> +	used = txstate->used;
>>>> +
>>>> +	spin_lock_irqsave(&pch->lock, flags);
>>>> +	sar = readl(regs + SA(thrd->id));
>>>> +	dar = readl(regs + DA(thrd->id));
>>>> +
>>>> +	list_for_each_entry(desc, &pch->work_list, node) {
>>>> +		if (desc->status == BUSY) {
>>>> +			current_c = desc->txd.cookie;
>>>> +			if (first) {
>>>> +				first_c = desc->txd.cookie;
>>>> +				first = false;
>>>> +			}
>>>> +
>>>> +			if (first_c < current_c)
>>>> +				residue += desc->px.bytes;
>>>> +			else {
>>>> +				if (desc->rqcfg.src_inc && pl330_src_addr_in_desc(desc, sar)) {
>>>> +					residue += desc->px.bytes;
>>>> +					residue -= sar - desc->px.src_addr;
>>>> +				} else if (desc->rqcfg.dst_inc && pl330_dst_addr_in_desc(desc, dar))
>>>> {
>>>> +					residue += desc->px.bytes;
>>>> +					residue -= dar - desc->px.dst_addr;
>>>> +				}
>>>> +			}
>>>> +		} else if (desc->status == PREP)
>>>> +			residue += desc->px.bytes;
>>>> +
>>>> +		if (desc->txd.cookie == used)
>>>> +			break;
>>>> +	}
>>>> +	spin_unlock_irqrestore(&pch->lock, flags);
>>>> +	dma_set_residue(txstate, residue);
>>>> +	return ret;
>>>>    }
> [snip]
>>>
>>> Any comment on this patch?
>>
>> Well it doesn't break audio, but I don't think it has the correct haviour
>> for all cases yet.
>
> OK. Any way of testing other cases like scatter-gather and memcopy.  I
> verified memcopy in dmatest but it seems not doing anything with
> residue bytes.

E.g. I think your current patch fails if more than one transfer has been 
submitted. In that case you'll count the bytes for all of them rather than 
just the one requested.

>
>>
>> Again, the semantics are that it should return the progress of the transfer
>>
>> for which the allocation function returned the cookie that is passe to this
>
> May be my understanding is wrong. For clarification..In the
> snd_dmaengine_pcm_pointer it is subtracting the residue bytes from the
> total buffer bytes not from period bytes. So how it expects
> the progress of the transfer of the passed cookie which just holds period bytes?

The issue that makes implementing this correctly for the pl330 driver 
complicated is that the driver allocates one cookie per segment/period, but 
the external API works with one cookie per transfer. All those cookies that 
are assigned to the segments/periods that are not the first in a transfer 
are not externally visible.

dmaengine_prep_slave_sg() and friends create a transfer and return 
descriptor for this transfer with a single cookie. If that cookie is passed 
to dmaengine_tx_status() the function is supposed to report the progress on 
that one transfer that was previously allocated and returned that cookie 
number. residue is the total number of bytes that are still left to to be 
processed for that transfer.

I've started reworking the pl330 driver to only have a single cookie per 
transfer, instead of one per period/segment. This makes implementing the 
residue reporting a lot easier. I never finished that work though, but I can 
try to see if I can revive it next week.

- Lars
Vinod Koul Dec. 5, 2014, 3:10 p.m. UTC | #6
On Tue, Dec 02, 2014 at 06:25:49PM +0100, Lars-Peter Clausen wrote:
> On 12/02/2014 06:38 AM, Padma Venkat wrote:
> 
> Well it doesn't break audio, but I don't think it has the correct
> haviour for all cases yet.
> 
> Again, the semantics are that it should return the progress of the
> transfer for which the allocation function returned the cookie that
> is passe to this function. You have to consider that there might be
> multiple different descriptors submitted and in the work list, not
> just the one we want to know the status of. The big problem with the
> pl330 driver is that the current structure of the driver makes it
> not so easy to implement the residue reporting correctly.

well you can use the completed_cookie as hint. This was last trasaction
which was issues, so calculate remianing bytes for this and subsequent ones
resiude would only be size of transaction
Vinod Koul Dec. 5, 2014, 3:15 p.m. UTC | #7
On Wed, Dec 03, 2014 at 01:21:37PM +0530, Jassi Brar wrote:
> On 3 December 2014 at 10:17, Padma Venkat <padma.kvr@gmail.com> wrote:
> > Hi Lars,
> >
> > [snip]
> >>>> +
> >>>> +   ret = dma_cookie_status(chan, cookie, txstate);
> >>>> +   if (ret == DMA_COMPLETE || !txstate)
> >>>> +           return ret;
> >>>> +
> >>>> +   used = txstate->used;
> >>>> +
> >>>> +   spin_lock_irqsave(&pch->lock, flags);
> >>>> +   sar = readl(regs + SA(thrd->id));
> >>>> +   dar = readl(regs + DA(thrd->id));
> >>>> +
> >>>> +   list_for_each_entry(desc, &pch->work_list, node) {
> >>>> +           if (desc->status == BUSY) {
> >>>> +                   current_c = desc->txd.cookie;
> >>>> +                   if (first) {
> >>>> +                           first_c = desc->txd.cookie;
> >>>> +                           first = false;
> >>>> +                   }
> >>>> +
> >>>> +                   if (first_c < current_c)
> >>>> +                           residue += desc->px.bytes;
> >>>> +                   else {
> >>>> +                           if (desc->rqcfg.src_inc && pl330_src_addr_in_desc(desc, sar)) {
> >>>> +                                   residue += desc->px.bytes;
> >>>> +                                   residue -= sar - desc->px.src_addr;
> >>>> +                           } else if (desc->rqcfg.dst_inc && pl330_dst_addr_in_desc(desc, dar))
> >>>> {
> >>>> +                                   residue += desc->px.bytes;
> >>>> +                                   residue -= dar - desc->px.dst_addr;
> >>>> +                           }
> >>>> +                   }
> >>>> +           } else if (desc->status == PREP)
> >>>> +                   residue += desc->px.bytes;
> >>>> +
> >>>> +           if (desc->txd.cookie == used)
> >>>> +                   break;
> >>>> +   }
> >>>> +   spin_unlock_irqrestore(&pch->lock, flags);
> >>>> +   dma_set_residue(txstate, residue);
> >>>> +   return ret;
> >>>>   }
> > [snip]
> >>>
> >>> Any comment on this patch?
> >>
> >> Well it doesn't break audio, but I don't think it has the correct haviour
> >> for all cases yet.
> >
> > OK. Any way of testing other cases like scatter-gather and memcopy.  I
> > verified memcopy in dmatest but it seems not doing anything with
> > residue bytes.
> >
> >>
> >> Again, the semantics are that it should return the progress of the transfer
> >>
> >> for which the allocation function returned the cookie that is passe to this
> >
> > May be my understanding is wrong. For clarification..In the
> > snd_dmaengine_pcm_pointer it is subtracting the residue bytes from the
> > total buffer bytes not from period bytes. So how it expects
> > the progress of the transfer of the passed cookie which just holds period bytes?
> >
> >>
> >> function. You have to consider that there might be multiple different
> >> descriptors submitted and in the work list, not just the one we want to know
> >
> > Even though there are multiple descriptors in the work list, at a time
> > only two descriptors are in busy state(as per the documentation in the
> > driver) and all the descriptors cookie number is in incremental order.
> > Not sure for other cases how it will be.
> >
> Yes.
> 
> Tracing the history ... I think we could have done without
> 
> 04abf5daf7d  dma: pl330: Differentiate between submitted and issued descriptors
> 
>     The pl330 dmaengine driver currently does not differentiate
> between submitted
>     and issued descriptors. It won't start transferring a newly submitted
>     descriptor until issue_pending() is called, but only if it is idle. If it is
>     active and a new descriptor is submitted before it goes idle it will happily
>     start the newly submitted descriptor once all earlier submitted
> descriptors have
>     been completed. This is not a 100% correct with regards to the dmaengine
>     interface semantics. A descriptor is not supposed to be started
> until the next
>     issue_pending() call after the descriptor has been submitted.
> 
> 
> because the reasoning above seems incorrect considering the following
> documentation...
> 
> Documentation/crypto/async-tx-api.txt says
>   " .... Once a driver-specific threshold is met the driver
> automatically issues pending operations.  An application can force this
> event by calling async_tx_issue_pending_all(). ...."
> 
> And
> 
> include/linux/dmaengine.h says
>   dma_async_tx_descriptor.tx_submit(): set the prepared descriptor(s)
> to be executed by the engine
"to be" here can lead to different conculsion. I will reword this :)

@tx_submit: accept the descriptor and assign ordered cookie and mark the
descriptor pending. To be pushed on .issue_pending() call
Russell King - ARM Linux Dec. 5, 2014, 3:18 p.m. UTC | #8
On Wed, Dec 03, 2014 at 01:21:37PM +0530, Jassi Brar wrote:
> because the reasoning above seems incorrect considering the following
> documentation...
> 
> Documentation/crypto/async-tx-api.txt says
                       ^^^^^^^^^^^^

async-tx-api is not the DMA slave API.

>   " .... Once a driver-specific threshold is met the driver
> automatically issues pending operations.  An application can force this
> event by calling async_tx_issue_pending_all(). ...."
                   ^^^^^^^^^^^^^^^^^^^^^^^^^^

DMA slave users do not call this function.  This documentation is not
relevant for DMA slave.

> include/linux/dmaengine.h says
>   dma_async_tx_descriptor.tx_submit(): set the prepared descriptor(s)
> to be executed by the engine

It doesn't say when.

> so theoretically a driver, not starting transfer until
> issue_pending(), is "broken".

It isn't.  DMA slave engine implementations have been needing the
issue_pending() call since their dawn, so it's something that they've
always needed.

> At best the patch@04abf5daf7d makes the driver slightly more
> complicated and the reason behind confusion such as in this thread.

That may be, and yes, it _might_ be worth discussing whether this should
be relaxed or not, but that should be done as a proposal, not trying to
hide it as a bug fix.  It isn't.
Jassi Brar Dec. 6, 2014, 7:01 a.m. UTC | #9
On 5 December 2014 at 20:48, Russell King - ARM Linux
<linux@arm.linux.org.uk> wrote:
> On Wed, Dec 03, 2014 at 01:21:37PM +0530, Jassi Brar wrote:
>> because the reasoning above seems incorrect considering the following
>> documentation...
>>
>> Documentation/crypto/async-tx-api.txt says
>                        ^^^^^^^^^^^^
>
> async-tx-api is not the DMA slave API.
>
>>   " .... Once a driver-specific threshold is met the driver
>> automatically issues pending operations.  An application can force this
>> event by calling async_tx_issue_pending_all(). ...."
>                    ^^^^^^^^^^^^^^^^^^^^^^^^^^
>
> DMA slave users do not call this function.  This documentation is not
> relevant for DMA slave.
>
The APIs (device_issue_pending, tx_submit etc) are same for Slave and Async.
async_tx_issue_pending_all() for Async and dma_async_issue_pending()
for Slave both boil down to device_issue_pending()

>> include/linux/dmaengine.h says
>>   dma_async_tx_descriptor.tx_submit(): set the prepared descriptor(s)
>> to be executed by the engine
>
> It doesn't say when.
>
:) OK

>> so theoretically a driver, not starting transfer until
>> issue_pending(), is "broken".
>
> It isn't.  DMA slave engine implementations have been needing the
> issue_pending() call since their dawn, so it's something that they've
> always needed.
>
>> At best the patch@04abf5daf7d makes the driver slightly more
>> complicated and the reason behind confusion such as in this thread.
>
> That may be, and yes, it _might_ be worth discussing whether this should
> be relaxed or not, but that should be done as a proposal, not trying to
> hide it as a bug fix.  It isn't.
>
It does, though, create an "awkward situation" when a channel is
active while new requests are submitted - why would the channel want
to stop after current transfer and await fresh issue_pending()? It's
not that the request can be modified after submission.

And it isn't that tx_submit() is meant for 'sleepable' preparation for
the transfer and we need another call to be issued from atomic
context. From what I see most drivers don't need to sleep in
tx_submit(). In fact, from a quick look most clients seem to suffer
from the ritual i.e, there's nothing between tx_submit() and
issue_pending() calls. And when there is indeed some code, it seems
that can be moved just before tx_submit().

Last and apparently the least of all, we can never enforce the same
behavior of the api on Async dma and have uniform behavior over the
api.

So, if all tx_submit() does is put the request in controller driver's
internal queue and the client can't touch the request after
tx_submit(), why not merge the tx_submit() and issue_pending()?

Thanks.
Vinod Koul Dec. 8, 2014, 1:07 p.m. UTC | #10
On Sat, Dec 06, 2014 at 12:31:01PM +0530, Jassi Brar wrote:
> It does, though, create an "awkward situation" when a channel is
> active while new requests are submitted - why would the channel want
> to stop after current transfer and await fresh issue_pending()? It's
> not that the request can be modified after submission.
> 
> And it isn't that tx_submit() is meant for 'sleepable' preparation for
> the transfer and we need another call to be issued from atomic
> context. From what I see most drivers don't need to sleep in
> tx_submit(). In fact, from a quick look most clients seem to suffer
> from the ritual i.e, there's nothing between tx_submit() and
> issue_pending() calls. And when there is indeed some code, it seems
> that can be moved just before tx_submit().
> 
> Last and apparently the least of all, we can never enforce the same
> behavior of the api on Async dma and have uniform behavior over the
> api.
> 
> So, if all tx_submit() does is put the request in controller driver's
> internal queue and the client can't touch the request after
> tx_submit(), why not merge the tx_submit() and issue_pending()?
You are thinking from an API point and not what can be done with this API.
Today this API allows you to collate all pending txn and start while
dmaengine driver can collate and submit as a batch to hardware and not waste
time in getting irq and then setting next one. Sadly none of the drivers use
this feature...

I actually like the split model, you can also prepare txn ahead of time and
submit them when needed. If you don't like this, then please do as most
implementation do today, call prepare and submit in series. Flexiblity in
API is a better option IMO

Thanks
Russell King - ARM Linux Dec. 8, 2014, 2:23 p.m. UTC | #11
On Mon, Dec 08, 2014 at 06:37:27PM +0530, Vinod Koul wrote:
> I actually like the split model, you can also prepare txn ahead of time and
> submit them when needed.

Actually, you can't - that's not permitted.  I have email(s) from Dan
explicitly stating that it is permitted for a driver to take a spinlock
in their prepare callback, and release it when the descriptor is
submitted.  Several DMA engine drivers (particularly those in for
async_tx) do exactly that.

The reason that submit is separate from prepare is to allow DMA engine
users to set a callback - if it weren't for that, there wouldn't be a
submit step, prepare would have done everything.
Vinod Koul Dec. 9, 2014, 6:10 a.m. UTC | #12
On Mon, Dec 08, 2014 at 02:23:21PM +0000, Russell King - ARM Linux wrote:
> On Mon, Dec 08, 2014 at 06:37:27PM +0530, Vinod Koul wrote:
> > I actually like the split model, you can also prepare txn ahead of time and
> > submit them when needed.
> 
> Actually, you can't - that's not permitted.  I have email(s) from Dan
> explicitly stating that it is permitted for a driver to take a spinlock
> in their prepare callback, and release it when the descriptor is
> submitted.  Several DMA engine drivers (particularly those in for
> async_tx) do exactly that.
> 
> The reason that submit is separate from prepare is to allow DMA engine
> users to set a callback - if it weren't for that, there wouldn't be a
> submit step, prepare would have done everything.
Yes thats right.

Do you mind pointing to thread Dan replied, I would like to add these bits
and anything else missing to Documentation

Thanks
Jassi Brar Dec. 9, 2014, 3:18 p.m. UTC | #13
On 8 December 2014 at 18:37, Vinod Koul <vinod.koul@intel.com> wrote:
> On Sat, Dec 06, 2014 at 12:31:01PM +0530, Jassi Brar wrote:
>> It does, though, create an "awkward situation" when a channel is
>> active while new requests are submitted - why would the channel want
>> to stop after current transfer and await fresh issue_pending()? It's
>> not that the request can be modified after submission.
>>
>> And it isn't that tx_submit() is meant for 'sleepable' preparation for
>> the transfer and we need another call to be issued from atomic
>> context. From what I see most drivers don't need to sleep in
>> tx_submit(). In fact, from a quick look most clients seem to suffer
>> from the ritual i.e, there's nothing between tx_submit() and
>> issue_pending() calls. And when there is indeed some code, it seems
>> that can be moved just before tx_submit().
>>
>> Last and apparently the least of all, we can never enforce the same
>> behavior of the api on Async dma and have uniform behavior over the
>> api.
>>
>> So, if all tx_submit() does is put the request in controller driver's
>> internal queue and the client can't touch the request after
>> tx_submit(), why not merge the tx_submit() and issue_pending()?
> You are thinking from an API point and not what can be done with this API.
>
"awkward situation" and "ritual suffering" above, are two practical
issues that we see.

> Today this API allows you to collate all pending txn and start while
> dmaengine driver can collate and submit as a batch to hardware and not waste
> time in getting irq and then setting next one. Sadly none of the drivers use
> this feature...
>
So we at least agree that the "feature" is unused so far.
And I doubt if it would ever make sense to batch transfers in slave
dma (unlike offloading on async-dma) because the user has to also
setup the master for each queued request, mostly if not always.

> I actually like the split model, you can also prepare txn ahead of time and
> submit them when needed. If you don't like this, then please do as most
> implementation do today, call prepare and submit in series. Flexiblity in
> API is a better option IMO
>
As Russell pointed out, that ain't the case either.
So we are yet to figure out benefits of having explicit
issue_pending() after tx_submit().

-Jassi
Vinod Koul Dec. 11, 2014, 4:47 a.m. UTC | #14
On Tue, Dec 09, 2014 at 08:48:04PM +0530, Jassi Brar wrote:
> As Russell pointed out, that ain't the case either.
> So we are yet to figure out benefits of having explicit
> issue_pending() after tx_submit().
callback ?
Jassi Brar Dec. 11, 2014, 6:12 a.m. UTC | #15
On 11 December 2014 at 10:17, Vinod Koul <vinod.koul@intel.com> wrote:
> On Tue, Dec 09, 2014 at 08:48:04PM +0530, Jassi Brar wrote:
>> As Russell pointed out, that ain't the case either.
>> So we are yet to figure out benefits of having explicit
>> issue_pending() after tx_submit().
> callback ?
>
The callback is set after prep() and before tx_submit(), but here we
talk after tx_submit().
Jassi Brar March 12, 2015, 8:47 a.m. UTC | #16
Hi Vinod, Hi Russell,

On 11 December 2014 at 11:42, Jassi Brar <jaswinder.singh@linaro.org> wrote:
> On 11 December 2014 at 10:17, Vinod Koul <vinod.koul@intel.com> wrote:
>> On Tue, Dec 09, 2014 at 08:48:04PM +0530, Jassi Brar wrote:
>>> As Russell pointed out, that ain't the case either.
>>> So we are yet to figure out benefits of having explicit
>>> issue_pending() after tx_submit().
>> callback ?
>>
> The callback is set after prep() and before tx_submit(), but here we
> talk after tx_submit().

Perhaps the idea dates back to async-only days, when client drivers
would prepare and queue descriptors in the controller driver rather
than having to manage the dependency queues themselves (?).

 Today ~95% clients are slave and I am yet to find one that really
can't work with submit and issue_pending tied together. Not to forget
the 100% of the controller drivers have to manage 'submitted' and
'active' queues -- only to have arguably negative side-effects.

If we agree that clubbing submit and issue_pending is the right thing
to do, I can start converting the ~90 client drivers. Please let me
know either way.

Cheers!
diff mbox

Patch

diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c
index b7493d2..db880ae 100644
--- a/drivers/dma/pl330.c
+++ b/drivers/dma/pl330.c
@@ -2182,11 +2182,74 @@  static void pl330_free_chan_resources(struct dma_chan *chan)
 	pm_runtime_put_autosuspend(pch->dmac->ddma.dev);
 }
 
+static inline int
+pl330_src_addr_in_desc(struct dma_pl330_desc *desc, unsigned int sar)
+{
+	return ((desc->px.src_addr <= sar) &&
+		(sar <= (desc->px.src_addr + desc->px.bytes)));
+}
+
+static inline int
+pl330_dst_addr_in_desc(struct dma_pl330_desc *desc, unsigned int dar)
+{
+	return ((desc->px.dst_addr <= dar) &&
+		(dar <= (desc->px.dst_addr + desc->px.bytes)));
+}
+
 static enum dma_status
 pl330_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
 		 struct dma_tx_state *txstate)
 {
-	return dma_cookie_status(chan, cookie, txstate);
+	dma_addr_t sar, dar;
+	struct dma_pl330_chan *pch = to_pchan(chan);
+	void __iomem *regs = pch->dmac->base;
+	struct pl330_thread *thrd = pch->thread;
+	struct dma_pl330_desc *desc;
+	unsigned int residue = 0;
+	unsigned long flags;
+	bool first = true;
+	dma_cookie_t first_c, current_c;
+	dma_cookie_t used;
+	enum dma_status ret;
+
+	ret = dma_cookie_status(chan, cookie, txstate);
+	if (ret == DMA_COMPLETE || !txstate)
+		return ret;
+
+	used = txstate->used;
+
+	spin_lock_irqsave(&pch->lock, flags);
+	sar = readl(regs + SA(thrd->id));
+	dar = readl(regs + DA(thrd->id));
+
+	list_for_each_entry(desc, &pch->work_list, node) {
+		if (desc->status == BUSY) {
+			current_c = desc->txd.cookie;
+			if (first) {
+				first_c = desc->txd.cookie;
+				first = false;
+			}
+
+			if (first_c < current_c)
+				residue += desc->px.bytes;
+			else {
+				if (desc->rqcfg.src_inc && pl330_src_addr_in_desc(desc, sar)) {
+					residue += desc->px.bytes;
+					residue -= sar - desc->px.src_addr;
+				} else if (desc->rqcfg.dst_inc && pl330_dst_addr_in_desc(desc, dar)) {
+					residue += desc->px.bytes;
+					residue -= dar - desc->px.dst_addr;
+				}
+			}
+		} else if (desc->status == PREP)
+			residue += desc->px.bytes;
+
+		if (desc->txd.cookie == used)
+			break;
+	}
+	spin_unlock_irqrestore(&pch->lock, flags);
+	dma_set_residue(txstate, residue);
+	return ret;
 }
 
 static void pl330_issue_pending(struct dma_chan *chan)
@@ -2631,7 +2694,7 @@  static int pl330_dma_device_slave_caps(struct dma_chan *dchan,
 	caps->directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
 	caps->cmd_pause = false;
 	caps->cmd_terminate = true;
-	caps->residue_granularity = DMA_RESIDUE_GRANULARITY_DESCRIPTOR;
+	caps->residue_granularity = DMA_RESIDUE_GRANULARITY_SEGMENT;
 
 	return 0;
 }