diff mbox series

[1/2] dmaengine: ti: k3-udma: Respond TX done if DMA_PREP_INTERRUPT is not requested

Message ID 20220822091531.27827-2-vaishnav.a@ti.com (mailing list archive)
State Changes Requested
Headers show
Series spi: spi-omap2-mcspi: Use EOW interrupt for transfer completion | expand

Commit Message

Vaishnav Achath Aug. 22, 2022, 9:15 a.m. UTC
When the DMA consumer driver does not expect the callback for TX done,
There is no need to perform the channel RT byte counter calculations 
and estimate the completion but return complete on first attempt itself.
This assumes that the consumer who did not request DMA_PREP_INTERRUPT 
has its own mechanism for understanding TX completion, example: MCSPI
EOW interrupt can be used as TX completion signal for a SPI transaction.

Signed-off-by: Vaishnav Achath <vaishnav.a@ti.com>
---
 drivers/dma/ti/k3-udma.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

Comments

Péter Ujfalusi Aug. 22, 2022, 1:12 p.m. UTC | #1
On 22/08/2022 12:15, Vaishnav Achath wrote:
> When the DMA consumer driver does not expect the callback for TX done,
> There is no need to perform the channel RT byte counter calculations 
> and estimate the completion but return complete on first attempt itself.
> This assumes that the consumer who did not request DMA_PREP_INTERRUPT 
> has its own mechanism for understanding TX completion, example: MCSPI
> EOW interrupt can be used as TX completion signal for a SPI transaction.

The check is in place to make sure that we don't leave stale data in the
DMA fabric.
If you drop the check then it is going to be possible that some TX data
is going to be lost.
Could be one out of 10K transfers or 100K, but if that happens it is not
going to be easy to figure out.
Let's say we go the packet back, but PDMA is still have data to send and
the IP stops transmitting (externally clocked bus, some delay, etc).
Is it going to be OK to disable the channel?

> 
> Signed-off-by: Vaishnav Achath <vaishnav.a@ti.com>
> ---
>  drivers/dma/ti/k3-udma.c | 5 ++++-
>  1 file changed, 4 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/dma/ti/k3-udma.c b/drivers/dma/ti/k3-udma.c
> index 39b330ada200..03d579068453 100644
> --- a/drivers/dma/ti/k3-udma.c
> +++ b/drivers/dma/ti/k3-udma.c
> @@ -263,6 +263,7 @@ struct udma_chan_config {
>  	enum udma_tp_level channel_tpl; /* Channel Throughput Level */
>  
>  	u32 tr_trigger_type;
> +	unsigned long tx_flags;
>  
>  	/* PKDMA mapped channel */
>  	int mapped_channel_id;
> @@ -1057,7 +1058,7 @@ static bool udma_is_desc_really_done(struct udma_chan *uc, struct udma_desc *d)
>  
>  	/* Only TX towards PDMA is affected */
>  	if (uc->config.ep_type == PSIL_EP_NATIVE ||
> -	    uc->config.dir != DMA_MEM_TO_DEV)
> +	    uc->config.dir != DMA_MEM_TO_DEV || !(uc->config.tx_flags & DMA_PREP_INTERRUPT))
>  		return true;
>  
>  	peer_bcnt = udma_tchanrt_read(uc, UDMA_CHAN_RT_PEER_BCNT_REG);
> @@ -3418,6 +3419,8 @@ udma_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
>  	if (!burst)
>  		burst = 1;
>  
> +	uc->config.tx_flags = tx_flags;
> +
>  	if (uc->config.pkt_mode)
>  		d = udma_prep_slave_sg_pkt(uc, sgl, sglen, dir, tx_flags,
>  					   context);
Vaishnav Achath Aug. 23, 2022, 6:57 a.m. UTC | #2
Hi Peter,

On 22/08/22 18:42, Péter Ujfalusi wrote:
> 
> 
> On 22/08/2022 12:15, Vaishnav Achath wrote:
>> When the DMA consumer driver does not expect the callback for TX done,
>> There is no need to perform the channel RT byte counter calculations 
>> and estimate the completion but return complete on first attempt itself.
>> This assumes that the consumer who did not request DMA_PREP_INTERRUPT 
>> has its own mechanism for understanding TX completion, example: MCSPI
>> EOW interrupt can be used as TX completion signal for a SPI transaction.
> 
> The check is in place to make sure that we don't leave stale data in the
> DMA fabric.
> If you drop the check then it is going to be possible that some TX data
> is going to be lost.
> Could be one out of 10K transfers or 100K, but if that happens it is not
> going to be easy to figure out.
> Let's say we go the packet back, but PDMA is still have data to send and
> the IP stops transmitting (externally clocked bus, some delay, etc).
> Is it going to be OK to disable the channel?
> 
Thanks for the feedback, yes the check is necessary for most of the cases
but there needs to be  a way to disable the check for consumers which can
identify the end of transaction using some other internal mechanism/interrupt.

For example the MCSPI controller has an End of Word(EOW) interrupt when the
said number of bytes has been clocked out, in this case the EOW interrupt
being raised guarantees that there is no stale data in DMA fabric.Using
the EOW interrupt to identify the completion of a transaction significantly
improves the transaction speed since we need not now wait for the slower DMA
TX completion calculation.

This commit tries to bypass the check only if the consumer did not request
it by not passing the DMA_PREP_INTERRUPT flag, in other cases the check
should not be bypassed.

>>
>> Signed-off-by: Vaishnav Achath <vaishnav.a@ti.com>
>> ---
>>  drivers/dma/ti/k3-udma.c | 5 ++++-
>>  1 file changed, 4 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/dma/ti/k3-udma.c b/drivers/dma/ti/k3-udma.c
>> index 39b330ada200..03d579068453 100644
>> --- a/drivers/dma/ti/k3-udma.c
>> +++ b/drivers/dma/ti/k3-udma.c
>> @@ -263,6 +263,7 @@ struct udma_chan_config {
>>  	enum udma_tp_level channel_tpl; /* Channel Throughput Level */
>>  
>>  	u32 tr_trigger_type;
>> +	unsigned long tx_flags;
>>  
>>  	/* PKDMA mapped channel */
>>  	int mapped_channel_id;
>> @@ -1057,7 +1058,7 @@ static bool udma_is_desc_really_done(struct udma_chan *uc, struct udma_desc *d)
>>  
>>  	/* Only TX towards PDMA is affected */
>>  	if (uc->config.ep_type == PSIL_EP_NATIVE ||
>> -	    uc->config.dir != DMA_MEM_TO_DEV)
>> +	    uc->config.dir != DMA_MEM_TO_DEV || !(uc->config.tx_flags & DMA_PREP_INTERRUPT))
>>  		return true;
>>  
>>  	peer_bcnt = udma_tchanrt_read(uc, UDMA_CHAN_RT_PEER_BCNT_REG);
>> @@ -3418,6 +3419,8 @@ udma_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
>>  	if (!burst)
>>  		burst = 1;
>>  
>> +	uc->config.tx_flags = tx_flags;
>> +
>>  	if (uc->config.pkt_mode)
>>  		d = udma_prep_slave_sg_pkt(uc, sgl, sglen, dir, tx_flags,
>>  					   context);
>
Péter Ujfalusi Sept. 2, 2022, 2:20 p.m. UTC | #3
Hi Achath,

On 23/08/2022 09:57, Vaishnav Achath wrote:
> Hi Peter,
> 
> On 22/08/22 18:42, Péter Ujfalusi wrote:
>>
>>
>> On 22/08/2022 12:15, Vaishnav Achath wrote:
>>> When the DMA consumer driver does not expect the callback for TX done,
>>> There is no need to perform the channel RT byte counter calculations
>>> and estimate the completion but return complete on first attempt itself.
>>> This assumes that the consumer who did not request DMA_PREP_INTERRUPT
>>> has its own mechanism for understanding TX completion, example: MCSPI
>>> EOW interrupt can be used as TX completion signal for a SPI transaction.
>>
>> The check is in place to make sure that we don't leave stale data in the
>> DMA fabric.
>> If you drop the check then it is going to be possible that some TX data
>> is going to be lost.
>> Could be one out of 10K transfers or 100K, but if that happens it is not
>> going to be easy to figure out.
>> Let's say we go the packet back, but PDMA is still have data to send and
>> the IP stops transmitting (externally clocked bus, some delay, etc).
>> Is it going to be OK to disable the channel?
>>
> Thanks for the feedback, yes the check is necessary for most of the cases
> but there needs to be  a way to disable the check for consumers which can
> identify the end of transaction using some other internal mechanism/interrupt.
> 
> For example the MCSPI controller has an End of Word(EOW) interrupt when the
> said number of bytes has been clocked out, in this case the EOW interrupt
> being raised guarantees that there is no stale data in DMA fabric.Using
> the EOW interrupt to identify the completion of a transaction significantly
> improves the transaction speed since we need not now wait for the slower DMA
> TX completion calculation.
> 
> This commit tries to bypass the check only if the consumer did not request
> it by not passing the DMA_PREP_INTERRUPT flag, in other cases the check
> should not be bypassed.

Let me think about over the weekend... Do you have performance numbers 
for this change?

If we make sure that this is only affecting non cyclic transfers with a 
in code comment to explain the expectations from the user I think this 
can be safe.

> 
>>>
>>> Signed-off-by: Vaishnav Achath <vaishnav.a@ti.com>
>>> ---
>>>   drivers/dma/ti/k3-udma.c | 5 ++++-
>>>   1 file changed, 4 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/drivers/dma/ti/k3-udma.c b/drivers/dma/ti/k3-udma.c
>>> index 39b330ada200..03d579068453 100644
>>> --- a/drivers/dma/ti/k3-udma.c
>>> +++ b/drivers/dma/ti/k3-udma.c
>>> @@ -263,6 +263,7 @@ struct udma_chan_config {
>>>   	enum udma_tp_level channel_tpl; /* Channel Throughput Level */
>>>   
>>>   	u32 tr_trigger_type;
>>> +	unsigned long tx_flags;
>>>   
>>>   	/* PKDMA mapped channel */
>>>   	int mapped_channel_id;
>>> @@ -1057,7 +1058,7 @@ static bool udma_is_desc_really_done(struct udma_chan *uc, struct udma_desc *d)
>>>   
>>>   	/* Only TX towards PDMA is affected */
>>>   	if (uc->config.ep_type == PSIL_EP_NATIVE ||
>>> -	    uc->config.dir != DMA_MEM_TO_DEV)
>>> +	    uc->config.dir != DMA_MEM_TO_DEV || !(uc->config.tx_flags & DMA_PREP_INTERRUPT))
>>>   		return true;
>>>   
>>>   	peer_bcnt = udma_tchanrt_read(uc, UDMA_CHAN_RT_PEER_BCNT_REG);
>>> @@ -3418,6 +3419,8 @@ udma_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
>>>   	if (!burst)
>>>   		burst = 1;
>>>   
>>> +	uc->config.tx_flags = tx_flags;
>>> +
>>>   	if (uc->config.pkt_mode)
>>>   		d = udma_prep_slave_sg_pkt(uc, sgl, sglen, dir, tx_flags,
>>>   					   context);
>>
>
Vaishnav Achath Sept. 5, 2022, 3:02 a.m. UTC | #4
Hi Peter,

On 02/09/22 19:50, Péter Ujfalusi wrote:
> Hi Achath,
> 
> On 23/08/2022 09:57, Vaishnav Achath wrote:
>> Hi Peter,
>>
>> On 22/08/22 18:42, Péter Ujfalusi wrote:
>>>
>>>
>>> On 22/08/2022 12:15, Vaishnav Achath wrote:
>>>> When the DMA consumer driver does not expect the callback for TX done,
>>>> There is no need to perform the channel RT byte counter calculations
>>>> and estimate the completion but return complete on first attempt itself.
>>>> This assumes that the consumer who did not request DMA_PREP_INTERRUPT
>>>> has its own mechanism for understanding TX completion, example: MCSPI
>>>> EOW interrupt can be used as TX completion signal for a SPI transaction.
>>>
>>> The check is in place to make sure that we don't leave stale data in the
>>> DMA fabric.
>>> If you drop the check then it is going to be possible that some TX data
>>> is going to be lost.
>>> Could be one out of 10K transfers or 100K, but if that happens it is not
>>> going to be easy to figure out.
>>> Let's say we go the packet back, but PDMA is still have data to send and
>>> the IP stops transmitting (externally clocked bus, some delay, etc).
>>> Is it going to be OK to disable the channel?
>>>
>> Thanks for the feedback, yes the check is necessary for most of the cases
>> but there needs to be  a way to disable the check for consumers which can
>> identify the end of transaction using some other internal mechanism/interrupt.
>>
>> For example the MCSPI controller has an End of Word(EOW) interrupt when the
>> said number of bytes has been clocked out, in this case the EOW interrupt
>> being raised guarantees that there is no stale data in DMA fabric.Using
>> the EOW interrupt to identify the completion of a transaction significantly
>> improves the transaction speed since we need not now wait for the slower DMA
>> TX completion calculation.
>>
>> This commit tries to bypass the check only if the consumer did not request
>> it by not passing the DMA_PREP_INTERRUPT flag, in other cases the check
>> should not be bypassed.
> 
> Let me think about over the weekend... Do you have performance numbers for this
> change?
> 
Thank you, yes we tested mainly for the SPI cases(Master and Slave mode), there
we saw a peak delay of 400ms for transaction completion and this varied with CPU
load, after adding the patch to not wait for DMA TX completion and use EOW
interrupt the peak latency reduced to 2ms.
> If we make sure that this is only affecting non cyclic transfers with a in code
> comment to explain the expectations from the user I think this can be safe.
> \
Sure I will add this in the next revision.
>>
>>>>
>>>> Signed-off-by: Vaishnav Achath <vaishnav.a@ti.com>
>>>> ---
>>>>   drivers/dma/ti/k3-udma.c | 5 ++++-
>>>>   1 file changed, 4 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/drivers/dma/ti/k3-udma.c b/drivers/dma/ti/k3-udma.c
>>>> index 39b330ada200..03d579068453 100644
>>>> --- a/drivers/dma/ti/k3-udma.c
>>>> +++ b/drivers/dma/ti/k3-udma.c
>>>> @@ -263,6 +263,7 @@ struct udma_chan_config {
>>>>       enum udma_tp_level channel_tpl; /* Channel Throughput Level */
>>>>         u32 tr_trigger_type;
>>>> +    unsigned long tx_flags;
>>>>         /* PKDMA mapped channel */
>>>>       int mapped_channel_id;
>>>> @@ -1057,7 +1058,7 @@ static bool udma_is_desc_really_done(struct udma_chan
>>>> *uc, struct udma_desc *d)
>>>>         /* Only TX towards PDMA is affected */
>>>>       if (uc->config.ep_type == PSIL_EP_NATIVE ||
>>>> -        uc->config.dir != DMA_MEM_TO_DEV)
>>>> +        uc->config.dir != DMA_MEM_TO_DEV || !(uc->config.tx_flags &
>>>> DMA_PREP_INTERRUPT))
>>>>           return true;
>>>>         peer_bcnt = udma_tchanrt_read(uc, UDMA_CHAN_RT_PEER_BCNT_REG);
>>>> @@ -3418,6 +3419,8 @@ udma_prep_slave_sg(struct dma_chan *chan, struct
>>>> scatterlist *sgl,
>>>>       if (!burst)
>>>>           burst = 1;
>>>>   +    uc->config.tx_flags = tx_flags;
>>>> +
>>>>       if (uc->config.pkt_mode)
>>>>           d = udma_prep_slave_sg_pkt(uc, sgl, sglen, dir, tx_flags,
>>>>                          context);
>>>
>>
>
Péter Ujfalusi Sept. 10, 2022, 6:57 p.m. UTC | #5
On 05/09/2022 06:02, Vaishnav Achath wrote:
>> Let me think about over the weekend... Do you have performance numbers for this
>> change?
>>
> Thank you, yes we tested mainly for the SPI cases(Master and Slave mode), there
> we saw a peak delay of 400ms for transaction completion and this varied with CPU
> load, after adding the patch to not wait for DMA TX completion and use EOW
> interrupt the peak latency reduced to 2ms.

Thank you for the details.

>> If we make sure that this is only affecting non cyclic transfers with a in code
>> comment to explain the expectations from the user I think this can be safe.
>> \
> Sure I will add this in the next revision.

You can add my Acked-by when you send the next version:
Acked-by: Peter Ujfalusi <peter.ujfalusi@gmail.com>

>>>
>>>>>
>>>>> Signed-off-by: Vaishnav Achath <vaishnav.a@ti.com>
>>>>> ---
>>>>>    drivers/dma/ti/k3-udma.c | 5 ++++-
>>>>>    1 file changed, 4 insertions(+), 1 deletion(-)
>>>>>
>>>>> diff --git a/drivers/dma/ti/k3-udma.c b/drivers/dma/ti/k3-udma.c
>>>>> index 39b330ada200..03d579068453 100644
>>>>> --- a/drivers/dma/ti/k3-udma.c
>>>>> +++ b/drivers/dma/ti/k3-udma.c
>>>>> @@ -263,6 +263,7 @@ struct udma_chan_config {
>>>>>        enum udma_tp_level channel_tpl; /* Channel Throughput Level */
>>>>>          u32 tr_trigger_type;
>>>>> +    unsigned long tx_flags;
>>>>>          /* PKDMA mapped channel */
>>>>>        int mapped_channel_id;
>>>>> @@ -1057,7 +1058,7 @@ static bool udma_is_desc_really_done(struct udma_chan
>>>>> *uc, struct udma_desc *d)
>>>>>          /* Only TX towards PDMA is affected */
>>>>>        if (uc->config.ep_type == PSIL_EP_NATIVE ||
>>>>> -        uc->config.dir != DMA_MEM_TO_DEV)
>>>>> +        uc->config.dir != DMA_MEM_TO_DEV || !(uc->config.tx_flags &
>>>>> DMA_PREP_INTERRUPT))
>>>>>            return true;
>>>>>          peer_bcnt = udma_tchanrt_read(uc, UDMA_CHAN_RT_PEER_BCNT_REG);
>>>>> @@ -3418,6 +3419,8 @@ udma_prep_slave_sg(struct dma_chan *chan, struct
>>>>> scatterlist *sgl,
>>>>>        if (!burst)
>>>>>            burst = 1;
>>>>>    +    uc->config.tx_flags = tx_flags;
>>>>> +
>>>>>        if (uc->config.pkt_mode)
>>>>>            d = udma_prep_slave_sg_pkt(uc, sgl, sglen, dir, tx_flags,
>>>>>                           context);
>>>>
>>>
>>
>
diff mbox series

Patch

diff --git a/drivers/dma/ti/k3-udma.c b/drivers/dma/ti/k3-udma.c
index 39b330ada200..03d579068453 100644
--- a/drivers/dma/ti/k3-udma.c
+++ b/drivers/dma/ti/k3-udma.c
@@ -263,6 +263,7 @@  struct udma_chan_config {
 	enum udma_tp_level channel_tpl; /* Channel Throughput Level */
 
 	u32 tr_trigger_type;
+	unsigned long tx_flags;
 
 	/* PKDMA mapped channel */
 	int mapped_channel_id;
@@ -1057,7 +1058,7 @@  static bool udma_is_desc_really_done(struct udma_chan *uc, struct udma_desc *d)
 
 	/* Only TX towards PDMA is affected */
 	if (uc->config.ep_type == PSIL_EP_NATIVE ||
-	    uc->config.dir != DMA_MEM_TO_DEV)
+	    uc->config.dir != DMA_MEM_TO_DEV || !(uc->config.tx_flags & DMA_PREP_INTERRUPT))
 		return true;
 
 	peer_bcnt = udma_tchanrt_read(uc, UDMA_CHAN_RT_PEER_BCNT_REG);
@@ -3418,6 +3419,8 @@  udma_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 	if (!burst)
 		burst = 1;
 
+	uc->config.tx_flags = tx_flags;
+
 	if (uc->config.pkt_mode)
 		d = udma_prep_slave_sg_pkt(uc, sgl, sglen, dir, tx_flags,
 					   context);