diff mbox

[1/5] spi/mxs: Fix extra CS pulses and read mode in multi-transfer messages

Message ID 1364570381-17605-1-git-send-email-tpiepho@gmail.com (mailing list archive)
State Superseded, archived
Headers show

Commit Message

Trent Piepho March 29, 2013, 3:19 p.m. UTC
There are two bits which control the CS line in the CTRL0 register:
LOCK_CS and IGNORE_CRC.  The latter would be better named DEASSERT_CS
in SPI mode.

LOCK_CS keeps CS asserted though the entire transfer.  This should
always be set.  The DMA code will always set it, explicitly on the
first segment of the first transfer, and then implicitly on all the
rest by never clearing the bit from the value read from the ctrl0
register.

The only reason to not set LOCK_CS would be to attempt an altered
protocol where CS pulses between each word.  Though don't get your
hopes up, as the hardware doesn't appear to do this in any sane
manner.

Setting DEASSERT_CS causes CS to be de-asserted at the end of the
transfer.  It would normally be set on the final segment of the final
transfer.  The DMA code explicitly sets it in this case, but because
it never clears the bit from the ctrl0 register is will remain set for
all transfers in subsequent messages.  This results in a CS pulse
between transfers.

There is a similar problem with the read mode bit never being cleared
in DMA mode.

The spi transfer "cs_change" flag is ignored by the driver.

The driver passes a "first" and "last" flag to the transfer functions
for a message, so they can know how to set these bits.  It does this
by passing them as pointers.  There is no reason to do this, as the
flags are not modified in the transfer function.  And since LOCK_CS
can be set and left that way, there is no need for a "first" flag at
all.

This patch fixes DEASSERT_CS and READ being left on in DMA mode,
eliminates the flags being passed as separate pointers, and adds
support for the cs_change flag.

Note that while using the cs_change flag to leave CS asserted after
the final transfer works, getting the CS to automatically turn off
when a different slave is addressed might not work.

Signed-off-by: Trent Piepho <tpiepho@gmail.com>
Cc: Marek Vasut <marex@denx.de>
Cc: Fabio Estevam <fabio.estevam@freescale.com>
Cc: Shawn Guo <shawn.guo@linaro.org>
---
 drivers/spi/spi-mxs.c |   86 +++++++++++++++++++++----------------------------
 1 file changed, 36 insertions(+), 50 deletions(-)

Comments

Marek Vasut April 1, 2013, 11:11 p.m. UTC | #1
Dear Trent Piepho,

> There are two bits which control the CS line in the CTRL0 register:
> LOCK_CS and IGNORE_CRC.  The latter would be better named DEASSERT_CS
> in SPI mode.
> 
> LOCK_CS keeps CS asserted though the entire transfer.  This should
> always be set.  The DMA code will always set it, explicitly on the
> first segment of the first transfer, and then implicitly on all the
> rest by never clearing the bit from the value read from the ctrl0
> register.
> 
> The only reason to not set LOCK_CS would be to attempt an altered
> protocol where CS pulses between each word.  Though don't get your
> hopes up, as the hardware doesn't appear to do this in any sane
> manner.
> 
> Setting DEASSERT_CS causes CS to be de-asserted at the end of the
> transfer.  It would normally be set on the final segment of the final
> transfer.  The DMA code explicitly sets it in this case, but because
> it never clears the bit from the ctrl0 register is will remain set for
> all transfers in subsequent messages.  This results in a CS pulse
> between transfers.
> 
> There is a similar problem with the read mode bit never being cleared
> in DMA mode.
> 
> The spi transfer "cs_change" flag is ignored by the driver.
> 
> The driver passes a "first" and "last" flag to the transfer functions
> for a message, so they can know how to set these bits.  It does this
> by passing them as pointers.  There is no reason to do this, as the
> flags are not modified in the transfer function.  And since LOCK_CS
> can be set and left that way, there is no need for a "first" flag at
> all.
> 
> This patch fixes DEASSERT_CS and READ being left on in DMA mode,
> eliminates the flags being passed as separate pointers, and adds
> support for the cs_change flag.
> 
> Note that while using the cs_change flag to leave CS asserted after
> the final transfer works, getting the CS to automatically turn off
> when a different slave is addressed might not work.
> 
> Signed-off-by: Trent Piepho <tpiepho@gmail.com>
> Cc: Marek Vasut <marex@denx.de>
> Cc: Fabio Estevam <fabio.estevam@freescale.com>
> Cc: Shawn Guo <shawn.guo@linaro.org>
> ---
>  drivers/spi/spi-mxs.c |   86
> +++++++++++++++++++++---------------------------- 1 file changed, 36
> insertions(+), 50 deletions(-)
> 
> diff --git a/drivers/spi/spi-mxs.c b/drivers/spi/spi-mxs.c
> index 22a0af0..aa77d96b9 100644
> --- a/drivers/spi/spi-mxs.c
> +++ b/drivers/spi/spi-mxs.c
> @@ -58,6 +58,11 @@
> 
>  #define SG_MAXLEN		0xff00
> 
> +/* Flags for txrx functions.  More efficient that using an argument
> register for + * each one. */

Fix the comment please.

/*
 * Multiline comment should really
 * be like this.
 */

> +#define TXRX_WRITE		1	/* This is a write */
> +#define TXRX_DEASSERT_CS	2	/* De-assert CS at end of txrx */

New flags? I'm sure the GCC can optimize function parameters pretty well, esp. 
if you make the bool.

>  struct mxs_spi {
>  	struct mxs_ssp		ssp;
>  	struct completion	c;
> @@ -91,6 +96,8 @@ static int mxs_spi_setup_transfer(struct spi_device *dev,
> 
>  	mxs_ssp_set_clk_rate(ssp, hz);
> 
> +	writel(BM_SSP_CTRL0_LOCK_CS,
> +		ssp->base + HW_SSP_CTRL0 + STMP_OFFSET_REG_SET);
>  	writel(BF_SSP_CTRL1_SSP_MODE(BV_SSP_CTRL1_SSP_MODE__SPI) |
>  		     BF_SSP_CTRL1_WORD_LENGTH
>  		     (BV_SSP_CTRL1_WORD_LENGTH__EIGHT_BITS) |
> @@ -155,26 +162,6 @@ static void mxs_spi_set_cs(struct mxs_spi *spi,
> unsigned cs) writel(select, ssp->base + HW_SSP_CTRL0 +
> STMP_OFFSET_REG_SET);
>  }
> 
> -static inline void mxs_spi_enable(struct mxs_spi *spi)
> -{
> -	struct mxs_ssp *ssp = &spi->ssp;
> -
> -	writel(BM_SSP_CTRL0_LOCK_CS,
> -		ssp->base + HW_SSP_CTRL0 + STMP_OFFSET_REG_SET);
> -	writel(BM_SSP_CTRL0_IGNORE_CRC,
> -		ssp->base + HW_SSP_CTRL0 + STMP_OFFSET_REG_CLR);
> -}
> -
> -static inline void mxs_spi_disable(struct mxs_spi *spi)
> -{
> -	struct mxs_ssp *ssp = &spi->ssp;
> -
> -	writel(BM_SSP_CTRL0_LOCK_CS,
> -		ssp->base + HW_SSP_CTRL0 + STMP_OFFSET_REG_CLR);
> -	writel(BM_SSP_CTRL0_IGNORE_CRC,
> -		ssp->base + HW_SSP_CTRL0 + STMP_OFFSET_REG_SET);
> -}
> -
>  static int mxs_ssp_wait(struct mxs_spi *spi, int offset, int mask, bool
> set) {
>  	const unsigned long timeout = jiffies + msecs_to_jiffies(SSP_TIMEOUT);
> @@ -214,7 +201,7 @@ static irqreturn_t mxs_ssp_irq_handler(int irq, void
> *dev_id)
> 
>  static int mxs_spi_txrx_dma(struct mxs_spi *spi, int cs,
>  			    unsigned char *buf, int len,
> -			    int *first, int *last, int write)
> +			    unsigned int flags)
>  {
>  	struct mxs_ssp *ssp = &spi->ssp;
>  	struct dma_async_tx_descriptor *desc = NULL;
> @@ -241,20 +228,21 @@ static int mxs_spi_txrx_dma(struct mxs_spi *spi, int
> cs, INIT_COMPLETION(spi->c);
> 
>  	ctrl0 = readl(ssp->base + HW_SSP_CTRL0);
> -	ctrl0 &= ~BM_SSP_CTRL0_XFER_COUNT;
> +	ctrl0 &= ~BM_SSP_CTRL0_XFER_COUNT & ~BM_SSP_CTRL0_IGNORE_CRC &
> +		 ~BM_SSP_CTRL0_READ;

Well ... ctrl0 &= ~(FOO | BAR | BAZ); ... won't cut it?

>  	ctrl0 |= BM_SSP_CTRL0_DATA_XFER | mxs_spi_cs_to_reg(cs);
> 
> -	if (*first)
> -		ctrl0 |= BM_SSP_CTRL0_LOCK_CS;

The DMA will overwrite ctrl0 in each turn, did you try very long (>64kB long) 
transfers with your adjustments? To test this, you can try

$ dd if=/dev/mtdN of=/dev/null bs=1M

on sufficiently large SPI flash supporting the FAST_READ opcode.

> -	if (!write)
> +	if (!(flags & TXRX_WRITE))
>  		ctrl0 |= BM_SSP_CTRL0_READ;
> 
>  	/* Queue the DMA data transfer. */
>  	for (sg_count = 0; sg_count < sgs; sg_count++) {
> +		/* Prepare the transfer descriptor. */
>  		min = min(len, desc_len);
> 
> -		/* Prepare the transfer descriptor. */
> -		if ((sg_count + 1 == sgs) && *last)
> +		/* De-assert CS on last segment if flag is set (i.e., no more
> +		 * transfers will follow) */

Wrong multi-line comment. See Documentation/CodingStyle chapter 8.

> +		if ((sg_count + 1 == sgs) && (flags & TXRX_DEASSERT_CS))
>  			ctrl0 |= BM_SSP_CTRL0_IGNORE_CRC;
> 
>  		if (ssp->devid == IMX23_SSP) {
> @@ -279,7 +267,7 @@ static int mxs_spi_txrx_dma(struct mxs_spi *spi, int
> cs,
> 
>  		sg_init_one(&dma_xfer[sg_count].sg, sg_buf, min);
>  		ret = dma_map_sg(ssp->dev, &dma_xfer[sg_count].sg, 1,
> -			write ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
> +			(flags & TXRX_WRITE) ? DMA_TO_DEVICE : DMA_FROM_DEVICE);

I think this is unneeded.

>  		len -= min;
>  		buf += min;
> @@ -299,7 +287,7 @@ static int mxs_spi_txrx_dma(struct mxs_spi *spi, int
> cs,
> 
>  		desc = dmaengine_prep_slave_sg(ssp->dmach,
>  				&dma_xfer[sg_count].sg, 1,
> -				write ? DMA_MEM_TO_DEV : DMA_DEV_TO_MEM,
> +				(flags & TXRX_WRITE) ? DMA_MEM_TO_DEV : 
DMA_DEV_TO_MEM,
>  				DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
> 
>  		if (!desc) {
> @@ -336,7 +324,7 @@ err_vmalloc:
>  	while (--sg_count >= 0) {
>  err_mapped:
>  		dma_unmap_sg(ssp->dev, &dma_xfer[sg_count].sg, 1,
> -			write ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
> +			(flags & TXRX_WRITE) ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
>  	}
> 
>  	kfree(dma_xfer);
> @@ -346,18 +334,20 @@ err_mapped:
> 
>  static int mxs_spi_txrx_pio(struct mxs_spi *spi, int cs,
>  			    unsigned char *buf, int len,
> -			    int *first, int *last, int write)
> +			    unsigned int flags)
>  {
>  	struct mxs_ssp *ssp = &spi->ssp;
> 
> -	if (*first)
> -		mxs_spi_enable(spi);
> +	/* Clear this now, set it on last transfer if needed */
> +	writel(BM_SSP_CTRL0_IGNORE_CRC,
> +	       ssp->base + HW_SSP_CTRL0 + STMP_OFFSET_REG_CLR);
> 
>  	mxs_spi_set_cs(spi, cs);
> 
>  	while (len--) {
> -		if (*last && len == 0)
> -			mxs_spi_disable(spi);
> +		if (len == 0 && (flags & TXRX_DEASSERT_CS))
> +			writel(BM_SSP_CTRL0_IGNORE_CRC,
> +			       ssp->base + HW_SSP_CTRL0 + STMP_OFFSET_REG_SET);

Ok, I'll stop here. You mixed two kinds of changes here:
1) The "fixup" of the flags
2) The adjustments to handling the IGNORE_CRC and LOCK_CS

Split the patch please.

------------------------------------------------------------------------------
Own the Future-Intel&reg; Level Up Game Demo Contest 2013
Rise to greatness in Intel's independent game demo contest.
Compete for recognition, cash, and the chance to get your game 
on Steam. $5K grand prize plus 10 genre and skill prizes. 
Submit your demo by 6/6/13. http://p.sf.net/sfu/intel_levelupd2d
Trent Piepho April 2, 2013, 1:24 a.m. UTC | #2
On Mon, Apr 1, 2013 at 4:11 PM, Marek Vasut <marex@denx.de> wrote:
>> +#define TXRX_WRITE           1       /* This is a write */
>> +#define TXRX_DEASSERT_CS     2       /* De-assert CS at end of txrx */
>
> New flags? I'm sure the GCC can optimize function parameters pretty well, esp.
> if you make the bool.

Nope.  I checked the actual compiled code.  Each bool takes another
parameter.  Since the txrx functions are large and called from
multiple sites, they are not inlined.  Gcc is not able to combine all
the bool arguments into one single bitfield argument.

On arm, to check if an argument on the stack is true requires two
instructions.  First an ldr to move the argument into a register and
then a cmp to check it for zero.  Checking for a bit is the basically
the same, first an ldr and then a tst.  So there is no performance
advantage of "if(x)" vs "if(x&32)".  But, if one uses bit flags then
the same flag word can be used for every flag, avoiding the need to
load a different word for each flag.

So using the flags avoids extra instructions in the caller for each
call to place each flag in its own boolean on the stack or in an
argument register (depending on the # of function arguments), and then
avoids extra code in the callee to load any arguments on the stack
into a register.  It can make code in loops faster and smaller by
using fewer register and avoiding a register load inside the loop.

BTW, after converting the code to bool arguments instead of flags:
add/remove: 0/0 grow/shrink: 4/0 up/down: 130/0 (130)
function                                     old     new   delta
mxs_spi_transfer_one                         808     900     +92
mxs_spi_txrx_pio                             492     508     +16
mxs_spi_txrx_dma                            1188    1204     +16
__UNIQUE_ID_vermagic0                         73      79      +6

Bool arguments for each flag is larger than using one argument and bit
flags.  And there are more flags needed to support things like GPIO CS
lines.

>>       ctrl0 = readl(ssp->base + HW_SSP_CTRL0);
>> -     ctrl0 &= ~BM_SSP_CTRL0_XFER_COUNT;
>> +     ctrl0 &= ~BM_SSP_CTRL0_XFER_COUNT & ~BM_SSP_CTRL0_IGNORE_CRC &
>> +              ~BM_SSP_CTRL0_READ;
>
> Well ... ctrl0 &= ~(FOO | BAR | BAZ); ... won't cut it?

Well, by De Morgan's law the two expressions are the same.  And they
are the same number of characters.  And both will produce a compile
time constant and thus the same code.  However, I like the ~FOO & ~BAR
& ~BAR form better as there is greater parallelism between each term
of the expression.

>>       ctrl0 |= BM_SSP_CTRL0_DATA_XFER | mxs_spi_cs_to_reg(cs);
>>
>> -     if (*first)
>> -             ctrl0 |= BM_SSP_CTRL0_LOCK_CS;
>
> The DMA will overwrite ctrl0 in each turn, did you try very long (>64kB long)
> transfers with your adjustments? To test this, you can try
>
> $ dd if=/dev/mtdN of=/dev/null bs=1M
>
> on sufficiently large SPI flash supporting the FAST_READ opcode.

Don't have SPI flash.  I did test with logic analyzer and the current
code is clearly broken and my changes clearly fix it.  It should be
obvious from looking a the code that it is wrong.  The DMA does write
ctrl0 on each segment of each transfer, but look at the previous
paragraph to see where it comes from.  Once READ or IGNORE_CRC are
set, nothing will clear them until PIO mode is used.  It's the same
with the bug fix for 3.9 that clears the XFER_COUNT bits in DMA mode.
Strangely, I didn't notice that bug since all my xfers are the same
length!  But I do switch between slaves, between read and write, and
use messages with multiple transfers, in DMA mode, all of which are
broken in the current driver.

>> +             if ((sg_count + 1 == sgs) && (flags & TXRX_DEASSERT_CS))
>>                       ctrl0 |= BM_SSP_CTRL0_IGNORE_CRC;
>>
>>               if (ssp->devid == IMX23_SSP) {
>> @@ -279,7 +267,7 @@ static int mxs_spi_txrx_dma(struct mxs_spi *spi, int
>> cs,
>>
>>               sg_init_one(&dma_xfer[sg_count].sg, sg_buf, min);
>>               ret = dma_map_sg(ssp->dev, &dma_xfer[sg_count].sg, 1,
>> -                     write ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
>> +                     (flags & TXRX_WRITE) ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
>
> I think this is unneeded.

Whatis ?  Setting the DMA direction or using flags?

Look at it this way:

Existing code that uses the first/last flags is wrong and needs to be
changed.  Therefor code using 'first' and 'last' will be changed.

Passing the flags as pointers is bad practice and makes no sense to
do.  Does it make any sense to re-write code fix the way it uses first
and last, then re-write those same lines a second time to not use
pointers?

If the way the flags are passed should change, then why not do it the
most efficient way.  It isn't any more complicated and produces better
code.  One flag per parameter becomes cumbersome when more flags are
added for additional features.

I'm all for splitting my patches up.  I sent five patches when I bet 9
out 10 developers would have just done one.  But I don't think it
makes any sense to re-write the same line of code over and over again
in successive patches in a series before getting to the final version.
 It just makes more churn to review.

I hate it when I get a series of patches and spot a flaw, take the
time to write up the flaw and how it should be fixed, only to discover
that the flaw is fixed later on in the series and I wasted my time.

------------------------------------------------------------------------------
Own the Future-Intel(R) Level Up Game Demo Contest 2013
Rise to greatness in Intel's independent game demo contest. Compete 
for recognition, cash, and the chance to get your game on Steam. 
$5K grand prize plus 10 genre and skill prizes. Submit your demo 
by 6/6/13. http://altfarm.mediaplex.com/ad/ck/12124-176961-30367-2
Marek Vasut April 2, 2013, 4:22 a.m. UTC | #3
Dear Trent Piepho,

> On Mon, Apr 1, 2013 at 4:11 PM, Marek Vasut <marex@denx.de> wrote:
> >> +#define TXRX_WRITE           1       /* This is a write */
> >> +#define TXRX_DEASSERT_CS     2       /* De-assert CS at end of txrx */
> > 
> > New flags? I'm sure the GCC can optimize function parameters pretty well,
> > esp. if you make the bool.
> 
> Nope.  I checked the actual compiled code.  Each bool takes another
> parameter.  Since the txrx functions are large and called from
> multiple sites, they are not inlined.  Gcc is not able to combine all
> the bool arguments into one single bitfield argument.

That's pretty poor, oh well.

btw. is it necessary to define new flags or is it possible to reuse some?

[...]

> >>       ctrl0 = readl(ssp->base + HW_SSP_CTRL0);
> >> 
> >> -     ctrl0 &= ~BM_SSP_CTRL0_XFER_COUNT;
> >> +     ctrl0 &= ~BM_SSP_CTRL0_XFER_COUNT & ~BM_SSP_CTRL0_IGNORE_CRC &
> >> +              ~BM_SSP_CTRL0_READ;
> > 
> > Well ... ctrl0 &= ~(FOO | BAR | BAZ); ... won't cut it?
> 
> Well, by De Morgan's law the two expressions are the same.  And they
> are the same number of characters.  And both will produce a compile
> time constant and thus the same code.  However, I like the ~FOO & ~BAR
> & ~BAR form better as there is greater parallelism between each term
> of the expression.

What about the law of readability of code ? ;-)

> >>       ctrl0 |= BM_SSP_CTRL0_DATA_XFER | mxs_spi_cs_to_reg(cs);
> >> 
> >> -     if (*first)
> >> -             ctrl0 |= BM_SSP_CTRL0_LOCK_CS;
> > 
> > The DMA will overwrite ctrl0 in each turn, did you try very long (>64kB
> > long) transfers with your adjustments? To test this, you can try
> > 
> > $ dd if=/dev/mtdN of=/dev/null bs=1M
> > 
> > on sufficiently large SPI flash supporting the FAST_READ opcode.
> 
> Don't have SPI flash.  I did test with logic analyzer and the current
> code is clearly broken and my changes clearly fix it.

Good, but you clearly didn't test it in the most used case. This will clearly 
piss off a lot of people if you break something. And this will clearly not be 
good ;-)

I'm clearly planning to test the code, but only once it's clearly possible to 
tell apart churn from the actual fix (see my other rambling, just reminding 
you). So I'll wait for V2 and give it a go on a large flash.

> It should be obvious from looking a the code that it is wrong.

Sorry, it is not obvious.

> The DMA does write
> ctrl0 on each segment of each transfer, but look at the previous
> paragraph to see where it comes from.  Once READ or IGNORE_CRC are
> set, nothing will clear them until PIO mode is used.

When using a flash, the DMA and PIO mode usage is intermixed. Usually, the PIO 
is used to program the command and then DMA to transmit the payload. It's hardly 
ever PIO only orr DMA only for the whole transfer.

> It's the same
> with the bug fix for 3.9 that clears the XFER_COUNT bits in DMA mode.
> Strangely, I didn't notice that bug since all my xfers are the same
> length!  But I do switch between slaves, between read and write, and
> use messages with multiple transfers, in DMA mode, all of which are
> broken in the current driver.

btw. are you using MX23 or MX28 to test this?

> >> +             if ((sg_count + 1 == sgs) && (flags & TXRX_DEASSERT_CS))
> >> 
> >>                       ctrl0 |= BM_SSP_CTRL0_IGNORE_CRC;
> >>               
> >>               if (ssp->devid == IMX23_SSP) {
> >> 
> >> @@ -279,7 +267,7 @@ static int mxs_spi_txrx_dma(struct mxs_spi *spi, int
> >> cs,
> >> 
> >>               sg_init_one(&dma_xfer[sg_count].sg, sg_buf, min);
> >>               ret = dma_map_sg(ssp->dev, &dma_xfer[sg_count].sg, 1,
> >> 
> >> -                     write ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
> >> +                     (flags & TXRX_WRITE) ? DMA_TO_DEVICE :
> >> DMA_FROM_DEVICE);
> > 
> > I think this is unneeded.
> 
> Whatis ?  Setting the DMA direction or using flags?
> 
> Look at it this way:
> 
> Existing code that uses the first/last flags is wrong and needs to be
> changed.  Therefor code using 'first' and 'last' will be changed.
> 
> Passing the flags as pointers is bad practice and makes no sense to
> do.  Does it make any sense to re-write code fix the way it uses first
> and last, then re-write those same lines a second time to not use
> pointers?

You can always flip it the other way -- first rework the use of flags, then 
apply the fix. It'd make the patch much easier to understand, don't you think?

> If the way the flags are passed should change, then why not do it the
> most efficient way.  It isn't any more complicated and produces better
> code.  One flag per parameter becomes cumbersome when more flags are
> added for additional features.
> 
> I'm all for splitting my patches up.  I sent five patches when I bet 9
> out 10 developers would have just done one.  But I don't think it
> makes any sense to re-write the same line of code over and over again
> in successive patches in a series before getting to the final version.
>  It just makes more churn to review.

Splitting the patchset to make it more understandable is OK though. And I'm 
really getting lost in this patch.

> I hate it when I get a series of patches and spot a flaw, take the
> time to write up the flaw and how it should be fixed, only to discover
> that the flaw is fixed later on in the series and I wasted my time.

Best regards,
Marek Vasut

------------------------------------------------------------------------------
Own the Future-Intel(R) Level Up Game Demo Contest 2013
Rise to greatness in Intel's independent game demo contest. Compete 
for recognition, cash, and the chance to get your game on Steam. 
$5K grand prize plus 10 genre and skill prizes. Submit your demo 
by 6/6/13. http://altfarm.mediaplex.com/ad/ck/12124-176961-30367-2
Trent Piepho April 2, 2013, 7:11 a.m. UTC | #4
On Mon, Apr 1, 2013 at 9:22 PM, Marek Vasut <marex@denx.de> wrote:
>> On Mon, Apr 1, 2013 at 4:11 PM, Marek Vasut <marex@denx.de> wrote:
>> >> +#define TXRX_WRITE           1       /* This is a write */
>> >> +#define TXRX_DEASSERT_CS     2       /* De-assert CS at end of txrx */
>
> btw. is it necessary to define new flags or is it possible to reuse some?

I don't see any others that would make sense.  The driver doesn't
define any flags at all.  The spi layer doesn't have ones that match.
And this is for a private static interface inside the driver.  It
wouldn't be right to co-opt flags defined for some other purpose.

>> >>       ctrl0 = readl(ssp->base + HW_SSP_CTRL0);
>> >>
>> >> -     ctrl0 &= ~BM_SSP_CTRL0_XFER_COUNT;
>> >> +     ctrl0 &= ~BM_SSP_CTRL0_XFER_COUNT & ~BM_SSP_CTRL0_IGNORE_CRC &
>> >> +              ~BM_SSP_CTRL0_READ;
>> >
>> > Well ... ctrl0 &= ~(FOO | BAR | BAZ); ... won't cut it?
>>
>> Well, by De Morgan's law the two expressions are the same.  And they
>> are the same number of characters.  And both will produce a compile
>> time constant and thus the same code.  However, I like the ~FOO & ~BAR
>> & ~BAR form better as there is greater parallelism between each term
>> of the expression.
>
> What about the law of readability of code ? ;-)

Don't see anything in CodingStyle that one should be preferred over
the other.  I think this way is more readable than the other.
Generally you can think of masking off bits via bitwise-and and
setting bits with bitwise-or.  So if you want to do a sequence of
masks, then using a sequence of bitwise-ands is the most readable and
straightforward code IMHO.  Combing bits to mask with bitwise-or, then
inverting the set and masking with that seems like a less clear way of
doing the same thing.  And this way the existing tokens aren't
modified, so there is less churn!

>> The DMA does write
>> ctrl0 on each segment of each transfer, but look at the previous
>> paragraph to see where it comes from.  Once READ or IGNORE_CRC are
>> set, nothing will clear them until PIO mode is used.
>
> When using a flash, the DMA and PIO mode usage is intermixed. Usually, the PIO
> is used to program the command and then DMA to transmit the payload. It's hardly
> ever PIO only orr DMA only for the whole transfer.

Probably why the problem hasn't been noticed.  It's obvious from a
cursory examination of the driver that bits in ctrl0 are only SET in
the DMA code, never cleared.  The PIO code does clear them.

>> It's the same
>> with the bug fix for 3.9 that clears the XFER_COUNT bits in DMA mode.
>> Strangely, I didn't notice that bug since all my xfers are the same
>> length!  But I do switch between slaves, between read and write, and
>> use messages with multiple transfers, in DMA mode, all of which are
>> broken in the current driver.
>
> btw. are you using MX23 or MX28 to test this?

IMX28.

>> Existing code that uses the first/last flags is wrong and needs to be
>> changed.  Therefor code using 'first' and 'last' will be changed.
>>
>> Passing the flags as pointers is bad practice and makes no sense to
>> do.  Does it make any sense to re-write code fix the way it uses first
>> and last, then re-write those same lines a second time to not use
>> pointers?
>
> You can always flip it the other way -- first rework the use of flags, then
> apply the fix. It'd make the patch much easier to understand, don't you think?

So I should change 'first' to not be a pointer to an integer in one
patch, then delete the flag entirely in another patch?  What's the
point of changing code just to delete it immediately afterward?  I'd
call that useless churn.

It also makes a patch series a PITA to deal with, as a change to
intermediate patch 1 creates a conflict when trying to apply
intermediate patch 2, 3, 4 and so on.  Instead of spending 5 mins
creating V2 of one patch that needs a change, you have to speed an
hour fixing V2 of an entire series.  And that problem is only caused
by trying to create the extra intermediate stages, that no one
actually cares about and will never use, with various known flaws that
are already fixed.

------------------------------------------------------------------------------
Own the Future-Intel(R) Level Up Game Demo Contest 2013
Rise to greatness in Intel's independent game demo contest. Compete 
for recognition, cash, and the chance to get your game on Steam. 
$5K grand prize plus 10 genre and skill prizes. Submit your demo 
by 6/6/13. http://altfarm.mediaplex.com/ad/ck/12124-176961-30367-2
Marek Vasut April 2, 2013, 10:32 a.m. UTC | #5
Dear Trent Piepho,

> On Mon, Apr 1, 2013 at 9:22 PM, Marek Vasut <marex@denx.de> wrote:
> >> On Mon, Apr 1, 2013 at 4:11 PM, Marek Vasut <marex@denx.de> wrote:
> >> >> +#define TXRX_WRITE           1       /* This is a write */
> >> >> +#define TXRX_DEASSERT_CS     2       /* De-assert CS at end of txrx
> >> >> */
> > 
> > btw. is it necessary to define new flags or is it possible to reuse some?
> 
> I don't see any others that would make sense.  The driver doesn't
> define any flags at all.  The spi layer doesn't have ones that match.
> And this is for a private static interface inside the driver.  It
> wouldn't be right to co-opt flags defined for some other purpose.

Ok, makes sense then.

btw. if defining bitfield flags, it makes more sense to use (1 << n) notation to 
make it more readable and also less prone to people using the actual flag value 
as a bitshift argument.

> >> >>       ctrl0 = readl(ssp->base + HW_SSP_CTRL0);
> >> >> 
> >> >> -     ctrl0 &= ~BM_SSP_CTRL0_XFER_COUNT;
> >> >> +     ctrl0 &= ~BM_SSP_CTRL0_XFER_COUNT & ~BM_SSP_CTRL0_IGNORE_CRC &
> >> >> +              ~BM_SSP_CTRL0_READ;
> >> > 
> >> > Well ... ctrl0 &= ~(FOO | BAR | BAZ); ... won't cut it?
> >> 
> >> Well, by De Morgan's law the two expressions are the same.  And they
> >> are the same number of characters.  And both will produce a compile
> >> time constant and thus the same code.  However, I like the ~FOO & ~BAR
> >> & ~BAR form better as there is greater parallelism between each term
> >> of the expression.
> > 
> > What about the law of readability of code ? ;-)
> 
> Don't see anything in CodingStyle that one should be preferred over
> the other.

There ain't any I'm aware of, but to paraphrase you, let's keep the format 
that's already used in the driver ;-) :

114         if (dev->mode & ~(SPI_CPOL | SPI_CPHA))

> I think this way is more readable than the other.
> Generally you can think of masking off bits via bitwise-and and
> setting bits with bitwise-or.  So if you want to do a sequence of
> masks, then using a sequence of bitwise-ands is the most readable and
> straightforward code IMHO.  Combing bits to mask with bitwise-or, then
> inverting the set and masking with that seems like a less clear way of
> doing the same thing.  And this way the existing tokens aren't
> modified, so there is less churn!
> 
> >> The DMA does write
> >> ctrl0 on each segment of each transfer, but look at the previous
> >> paragraph to see where it comes from.  Once READ or IGNORE_CRC are
> >> set, nothing will clear them until PIO mode is used.
> > 
> > When using a flash, the DMA and PIO mode usage is intermixed. Usually,
> > the PIO is used to program the command and then DMA to transmit the
> > payload. It's hardly ever PIO only orr DMA only for the whole transfer.
> 
> Probably why the problem hasn't been noticed.  It's obvious from a
> cursory examination of the driver that bits in ctrl0 are only SET in
> the DMA code, never cleared.  The PIO code does clear them.
> 
> >> It's the same
> >> with the bug fix for 3.9 that clears the XFER_COUNT bits in DMA mode.
> >> Strangely, I didn't notice that bug since all my xfers are the same
> >> length!  But I do switch between slaves, between read and write, and
> >> use messages with multiple transfers, in DMA mode, all of which are
> >> broken in the current driver.
> > 
> > btw. are you using MX23 or MX28 to test this?
> 
> IMX28.

Is that any mainline board?

> >> Existing code that uses the first/last flags is wrong and needs to be
> >> changed.  Therefor code using 'first' and 'last' will be changed.
> >> 
> >> Passing the flags as pointers is bad practice and makes no sense to
> >> do.  Does it make any sense to re-write code fix the way it uses first
> >> and last, then re-write those same lines a second time to not use
> >> pointers?
> > 
> > You can always flip it the other way -- first rework the use of flags,
> > then apply the fix. It'd make the patch much easier to understand, don't
> > you think?
> 
> So I should change 'first' to not be a pointer to an integer in one
> patch, then delete the flag entirely in another patch?

I'd say make a patch (0001) that implements the transition to using your newly 
defined flags and then make a patch (0002) that "Fix extra CS pulses and read 
mode in multi-transfer messages". One patch shall do one thing, that's the usual 
rule of the thumb. Obviously keep them in a series if these patches shall go in 
together. And why doesn't squashing them all together work you might ask -- 
because reviewing the result is hard.

[...]

Best regards,
Marek Vasut

------------------------------------------------------------------------------
Own the Future-Intel(R) Level Up Game Demo Contest 2013
Rise to greatness in Intel's independent game demo contest. Compete 
for recognition, cash, and the chance to get your game on Steam. 
$5K grand prize plus 10 genre and skill prizes. Submit your demo 
by 6/6/13. http://altfarm.mediaplex.com/ad/ck/12124-176961-30367-2
Trent Piepho April 2, 2013, 12:40 p.m. UTC | #6
On Tue, Apr 2, 2013 at 3:32 AM, Marek Vasut <marex@denx.de> wrote:
>>
>> Don't see anything in CodingStyle that one should be preferred over
>> the other.
>
> There ain't any I'm aware of, but to paraphrase you, let's keep the format
> that's already used in the driver ;-) :
>
> 114         if (dev->mode & ~(SPI_CPOL | SPI_CPHA))

Thanks for pointing that out, as that code can be deleted.  Which
means I don't need to be consistent with it anymore and can create a
new standard, to be followed henceforth, now and forever.

But it looks like the or method is more common in the kernel code.  So
I'll change it in V2.

>> > btw. are you using MX23 or MX28 to test this?
>>
>> IMX28.
>
> Is that any mainline board?

Nope, custom hardware.

>> >> Existing code that uses the first/last flags is wrong and needs to be
>> >> changed.  Therefor code using 'first' and 'last' will be changed.
>> >>
>> >> Passing the flags as pointers is bad practice and makes no sense to
>> >> do.  Does it make any sense to re-write code fix the way it uses first
>> >> and last, then re-write those same lines a second time to not use
>> >> pointers?
>> >
>> > You can always flip it the other way -- first rework the use of flags,
>> > then apply the fix. It'd make the patch much easier to understand, don't
>> > you think?
>>
>> So I should change 'first' to not be a pointer to an integer in one
>> patch, then delete the flag entirely in another patch?
>
> I'd say make a patch (0001) that implements the transition to using your newly
> defined flags and then make a patch (0002) that "Fix extra CS pulses and read
> mode in multi-transfer messages". One patch shall do one thing, that's the usual
> rule of the thumb. Obviously keep them in a series if these patches shall go in
> together. And why doesn't squashing them all together work you might ask --
> because reviewing the result is hard.

5 patches have now been split into 12.

------------------------------------------------------------------------------
Own the Future-Intel(R) Level Up Game Demo Contest 2013
Rise to greatness in Intel's independent game demo contest. Compete 
for recognition, cash, and the chance to get your game on Steam. 
$5K grand prize plus 10 genre and skill prizes. Submit your demo 
by 6/6/13. http://altfarm.mediaplex.com/ad/ck/12124-176961-30367-2
Marek Vasut April 2, 2013, 11:39 p.m. UTC | #7
Dear Trent Piepho,

> On Tue, Apr 2, 2013 at 3:32 AM, Marek Vasut <marex@denx.de> wrote:
> >> Don't see anything in CodingStyle that one should be preferred over
> >> the other.
> > 
> > There ain't any I'm aware of, but to paraphrase you, let's keep the
> > format that's already used in the driver ;-) :
> > 
> > 114         if (dev->mode & ~(SPI_CPOL | SPI_CPHA))
> 
> Thanks for pointing that out, as that code can be deleted.  Which
> means I don't need to be consistent with it anymore and can create a
> new standard, to be followed henceforth, now and forever.
> 
> But it looks like the or method is more common in the kernel code.  So
> I'll change it in V2.
> 
> >> > btw. are you using MX23 or MX28 to test this?
> >> 
> >> IMX28.
> > 
> > Is that any mainline board?
> 
> Nope, custom hardware.
> 
> >> >> Existing code that uses the first/last flags is wrong and needs to be
> >> >> changed.  Therefor code using 'first' and 'last' will be changed.
> >> >> 
> >> >> Passing the flags as pointers is bad practice and makes no sense to
> >> >> do.  Does it make any sense to re-write code fix the way it uses
> >> >> first and last, then re-write those same lines a second time to not
> >> >> use pointers?
> >> > 
> >> > You can always flip it the other way -- first rework the use of flags,
> >> > then apply the fix. It'd make the patch much easier to understand,
> >> > don't you think?
> >> 
> >> So I should change 'first' to not be a pointer to an integer in one
> >> patch, then delete the flag entirely in another patch?
> > 
> > I'd say make a patch (0001) that implements the transition to using your
> > newly defined flags and then make a patch (0002) that "Fix extra CS
> > pulses and read mode in multi-transfer messages". One patch shall do one
> > thing, that's the usual rule of the thumb. Obviously keep them in a
> > series if these patches shall go in together. And why doesn't squashing
> > them all together work you might ask -- because reviewing the result is
> > hard.
> 
> 5 patches have now been split into 12.

Much better, yes.

Best regards,
Marek Vasut

------------------------------------------------------------------------------
Minimize network downtime and maximize team effectiveness.
Reduce network management and security costs.Learn how to hire 
the most talented Cisco Certified professionals. Visit the 
Employer Resources Portal
http://www.cisco.com/web/learning/employer_resources/index.html
diff mbox

Patch

diff --git a/drivers/spi/spi-mxs.c b/drivers/spi/spi-mxs.c
index 22a0af0..aa77d96b9 100644
--- a/drivers/spi/spi-mxs.c
+++ b/drivers/spi/spi-mxs.c
@@ -58,6 +58,11 @@ 
 
 #define SG_MAXLEN		0xff00
 
+/* Flags for txrx functions.  More efficient that using an argument register for
+ * each one. */
+#define TXRX_WRITE		1	/* This is a write */
+#define TXRX_DEASSERT_CS	2	/* De-assert CS at end of txrx */
+
 struct mxs_spi {
 	struct mxs_ssp		ssp;
 	struct completion	c;
@@ -91,6 +96,8 @@  static int mxs_spi_setup_transfer(struct spi_device *dev,
 
 	mxs_ssp_set_clk_rate(ssp, hz);
 
+	writel(BM_SSP_CTRL0_LOCK_CS,
+		ssp->base + HW_SSP_CTRL0 + STMP_OFFSET_REG_SET);
 	writel(BF_SSP_CTRL1_SSP_MODE(BV_SSP_CTRL1_SSP_MODE__SPI) |
 		     BF_SSP_CTRL1_WORD_LENGTH
 		     (BV_SSP_CTRL1_WORD_LENGTH__EIGHT_BITS) |
@@ -155,26 +162,6 @@  static void mxs_spi_set_cs(struct mxs_spi *spi, unsigned cs)
 	writel(select, ssp->base + HW_SSP_CTRL0 + STMP_OFFSET_REG_SET);
 }
 
-static inline void mxs_spi_enable(struct mxs_spi *spi)
-{
-	struct mxs_ssp *ssp = &spi->ssp;
-
-	writel(BM_SSP_CTRL0_LOCK_CS,
-		ssp->base + HW_SSP_CTRL0 + STMP_OFFSET_REG_SET);
-	writel(BM_SSP_CTRL0_IGNORE_CRC,
-		ssp->base + HW_SSP_CTRL0 + STMP_OFFSET_REG_CLR);
-}
-
-static inline void mxs_spi_disable(struct mxs_spi *spi)
-{
-	struct mxs_ssp *ssp = &spi->ssp;
-
-	writel(BM_SSP_CTRL0_LOCK_CS,
-		ssp->base + HW_SSP_CTRL0 + STMP_OFFSET_REG_CLR);
-	writel(BM_SSP_CTRL0_IGNORE_CRC,
-		ssp->base + HW_SSP_CTRL0 + STMP_OFFSET_REG_SET);
-}
-
 static int mxs_ssp_wait(struct mxs_spi *spi, int offset, int mask, bool set)
 {
 	const unsigned long timeout = jiffies + msecs_to_jiffies(SSP_TIMEOUT);
@@ -214,7 +201,7 @@  static irqreturn_t mxs_ssp_irq_handler(int irq, void *dev_id)
 
 static int mxs_spi_txrx_dma(struct mxs_spi *spi, int cs,
 			    unsigned char *buf, int len,
-			    int *first, int *last, int write)
+			    unsigned int flags)
 {
 	struct mxs_ssp *ssp = &spi->ssp;
 	struct dma_async_tx_descriptor *desc = NULL;
@@ -241,20 +228,21 @@  static int mxs_spi_txrx_dma(struct mxs_spi *spi, int cs,
 	INIT_COMPLETION(spi->c);
 
 	ctrl0 = readl(ssp->base + HW_SSP_CTRL0);
-	ctrl0 &= ~BM_SSP_CTRL0_XFER_COUNT;
+	ctrl0 &= ~BM_SSP_CTRL0_XFER_COUNT & ~BM_SSP_CTRL0_IGNORE_CRC &
+		 ~BM_SSP_CTRL0_READ;
 	ctrl0 |= BM_SSP_CTRL0_DATA_XFER | mxs_spi_cs_to_reg(cs);
 
-	if (*first)
-		ctrl0 |= BM_SSP_CTRL0_LOCK_CS;
-	if (!write)
+	if (!(flags & TXRX_WRITE))
 		ctrl0 |= BM_SSP_CTRL0_READ;
 
 	/* Queue the DMA data transfer. */
 	for (sg_count = 0; sg_count < sgs; sg_count++) {
+		/* Prepare the transfer descriptor. */
 		min = min(len, desc_len);
 
-		/* Prepare the transfer descriptor. */
-		if ((sg_count + 1 == sgs) && *last)
+		/* De-assert CS on last segment if flag is set (i.e., no more
+		 * transfers will follow) */
+		if ((sg_count + 1 == sgs) && (flags & TXRX_DEASSERT_CS))
 			ctrl0 |= BM_SSP_CTRL0_IGNORE_CRC;
 
 		if (ssp->devid == IMX23_SSP) {
@@ -279,7 +267,7 @@  static int mxs_spi_txrx_dma(struct mxs_spi *spi, int cs,
 
 		sg_init_one(&dma_xfer[sg_count].sg, sg_buf, min);
 		ret = dma_map_sg(ssp->dev, &dma_xfer[sg_count].sg, 1,
-			write ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
+			(flags & TXRX_WRITE) ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
 
 		len -= min;
 		buf += min;
@@ -299,7 +287,7 @@  static int mxs_spi_txrx_dma(struct mxs_spi *spi, int cs,
 
 		desc = dmaengine_prep_slave_sg(ssp->dmach,
 				&dma_xfer[sg_count].sg, 1,
-				write ? DMA_MEM_TO_DEV : DMA_DEV_TO_MEM,
+				(flags & TXRX_WRITE) ? DMA_MEM_TO_DEV : DMA_DEV_TO_MEM,
 				DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
 
 		if (!desc) {
@@ -336,7 +324,7 @@  err_vmalloc:
 	while (--sg_count >= 0) {
 err_mapped:
 		dma_unmap_sg(ssp->dev, &dma_xfer[sg_count].sg, 1,
-			write ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
+			(flags & TXRX_WRITE) ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
 	}
 
 	kfree(dma_xfer);
@@ -346,18 +334,20 @@  err_mapped:
 
 static int mxs_spi_txrx_pio(struct mxs_spi *spi, int cs,
 			    unsigned char *buf, int len,
-			    int *first, int *last, int write)
+			    unsigned int flags)
 {
 	struct mxs_ssp *ssp = &spi->ssp;
 
-	if (*first)
-		mxs_spi_enable(spi);
+	/* Clear this now, set it on last transfer if needed */
+	writel(BM_SSP_CTRL0_IGNORE_CRC,
+	       ssp->base + HW_SSP_CTRL0 + STMP_OFFSET_REG_CLR);
 
 	mxs_spi_set_cs(spi, cs);
 
 	while (len--) {
-		if (*last && len == 0)
-			mxs_spi_disable(spi);
+		if (len == 0 && (flags & TXRX_DEASSERT_CS))
+			writel(BM_SSP_CTRL0_IGNORE_CRC,
+			       ssp->base + HW_SSP_CTRL0 + STMP_OFFSET_REG_SET);
 
 		if (ssp->devid == IMX23_SSP) {
 			writel(BM_SSP_CTRL0_XFER_COUNT,
@@ -368,7 +358,7 @@  static int mxs_spi_txrx_pio(struct mxs_spi *spi, int cs,
 			writel(1, ssp->base + HW_SSP_XFER_SIZE);
 		}
 
-		if (write)
+		if (flags & TXRX_WRITE)
 			writel(BM_SSP_CTRL0_READ,
 				ssp->base + HW_SSP_CTRL0 + STMP_OFFSET_REG_CLR);
 		else
@@ -381,13 +371,13 @@  static int mxs_spi_txrx_pio(struct mxs_spi *spi, int cs,
 		if (mxs_ssp_wait(spi, HW_SSP_CTRL0, BM_SSP_CTRL0_RUN, 1))
 			return -ETIMEDOUT;
 
-		if (write)
+		if (flags & TXRX_WRITE)
 			writel(*buf, ssp->base + HW_SSP_DATA(ssp));
 
 		writel(BM_SSP_CTRL0_DATA_XFER,
 			     ssp->base + HW_SSP_CTRL0 + STMP_OFFSET_REG_SET);
 
-		if (!write) {
+		if (!(flags & TXRX_WRITE)) {
 			if (mxs_ssp_wait(spi, HW_SSP_STATUS(ssp),
 						BM_SSP_STATUS_FIFO_EMPTY, 0))
 				return -ETIMEDOUT;
@@ -412,13 +402,11 @@  static int mxs_spi_transfer_one(struct spi_master *master,
 {
 	struct mxs_spi *spi = spi_master_get_devdata(master);
 	struct mxs_ssp *ssp = &spi->ssp;
-	int first, last;
 	struct spi_transfer *t, *tmp_t;
+	unsigned int flag;
 	int status = 0;
 	int cs;
 
-	first = last = 0;
-
 	cs = m->spi->chip_select;
 
 	list_for_each_entry_safe(t, tmp_t, &m->transfers, transfer_list) {
@@ -427,10 +415,9 @@  static int mxs_spi_transfer_one(struct spi_master *master,
 		if (status)
 			break;
 
-		if (&t->transfer_list == m->transfers.next)
-			first = 1;
-		if (&t->transfer_list == m->transfers.prev)
-			last = 1;
+		/* De-assert on last transfer, inverted by cs_change flag */
+		flag = (&t->transfer_list == m->transfers.prev) ^ t->cs_change ?
+		       TXRX_DEASSERT_CS : 0;
 		if ((t->rx_buf && t->tx_buf) || (t->rx_dma && t->tx_dma)) {
 			dev_err(ssp->dev,
 				"Cannot send and receive simultaneously\n");
@@ -455,11 +442,11 @@  static int mxs_spi_transfer_one(struct spi_master *master,
 			if (t->tx_buf)
 				status = mxs_spi_txrx_pio(spi, cs,
 						(void *)t->tx_buf,
-						t->len, &first, &last, 1);
+						t->len, flag | TXRX_WRITE);
 			if (t->rx_buf)
 				status = mxs_spi_txrx_pio(spi, cs,
 						t->rx_buf, t->len,
-						&first, &last, 0);
+						flag);
 		} else {
 			writel(BM_SSP_CTRL1_DMA_ENABLE,
 				ssp->base + HW_SSP_CTRL1(ssp) +
@@ -468,11 +455,11 @@  static int mxs_spi_transfer_one(struct spi_master *master,
 			if (t->tx_buf)
 				status = mxs_spi_txrx_dma(spi, cs,
 						(void *)t->tx_buf, t->len,
-						&first, &last, 1);
+						flag | TXRX_WRITE);
 			if (t->rx_buf)
 				status = mxs_spi_txrx_dma(spi, cs,
 						t->rx_buf, t->len,
-						&first, &last, 0);
+						flag);
 		}
 
 		if (status) {
@@ -481,7 +468,6 @@  static int mxs_spi_transfer_one(struct spi_master *master,
 		}
 
 		m->actual_length += t->len;
-		first = last = 0;
 	}
 
 	m->status = status;