diff mbox

[v2,3/5] dmaengine: pxa: add pxa dmaengine driver

Message ID 1428781236-25806-4-git-send-email-robert.jarzmik@free.fr (mailing list archive)
State Changes Requested
Headers show

Commit Message

Robert Jarzmik April 11, 2015, 7:40 p.m. UTC
This is a new driver for pxa SoCs, which is also compatible with the former
mmp_pdma.

The rationale behind a new driver (as opposed to incremental patching) was :

 - the new driver relies on virt-dma, which obsoletes all the internal
   structures of mmp_pdma (sw_desc, hw_desc, ...), and by consequence all the
   functions

 - mmp_pdma allocates dma coherent descriptors containing not only hardware
   descriptors but linked list information
   The new driver only puts the dma hardware descriptors (ie. 4 u32) into the
   dma pool allocated memory. This changes completely the way descriptors are
   handled

 - the architecture behind the interrupt/tasklet management was rewritten to be
   more conforming to virt-dma

 - the buffers alignment is handled differently
   The former driver assumed that the DMA channel stopped between each
   descriptor. The new one chains descriptors to let the channel running. This
   is a necessary guarantee for real-time high bandwidth usecases such as video
   capture on "old" architectures such as pxa.

 - hot chaining / cold chaining / no chaining
   Whenever possible, submitting a descriptor "hot chains" it to a running
   channel. There is still no guarantee that the descriptor will be issued, as
   the channel might be stopped just before the descriptor is submitted. Yet
   this allows to submit several video buffers, and resubmit a buffer while
   another is under handling.
   As before, dma_async_issue_pending() is the only guarantee to have all the
   buffers issued.
   When an alignment issue is detected (ie. one address in a descriptor is not
   a multiple of 8), if the already running channel is in "aligned mode", the
   channel will stop, and restarted in "misaligned mode" to finished the issued
   list.

 - descriptors reusing
   A submitted, issued and completed descriptor can be reused, ie resubmitted if
   it was prepared with the proper flag (DMA_PREP_ACK).  Only a channel
   resources release will in this case release that buffer.
   This allows a rolling ring of buffers to be reused, where there are several
   thousands of hardware descriptors used (video buffer for example).

Additionally, a set of more casual features is introduced :
 - debugging traces
 - lockless way to know if a descriptor is terminated or not

The driver was tested on zylonite board (pxa3xx) and mioa701 (pxa27x),
with dmatest, pxa_camera and pxamci.

Signed-off-by: Robert Jarzmik <robert.jarzmik@free.fr>
---
Since v1: 1) fixed muxed interrupt requesting
---
 drivers/dma/Kconfig         |   11 +
 drivers/dma/Makefile        |    1 +
 drivers/dma/pxa_dma.c       | 1211 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/dma/pxa-dma.h |   27 +
 4 files changed, 1250 insertions(+)
 create mode 100644 drivers/dma/pxa_dma.c
 create mode 100644 include/linux/dma/pxa-dma.h

Comments

Vinod Koul May 8, 2015, 6:34 a.m. UTC | #1
On Sat, Apr 11, 2015 at 09:40:34PM +0200, Robert Jarzmik wrote:
> This is a new driver for pxa SoCs, which is also compatible with the former
> mmp_pdma.
The rationale is fine, is there a plan to remove old mmp_pdma then?

> +config PXA_DMA
> +	bool "PXA DMA support"
no prompt?

> +
> +#define DRCMR(n)	((((n) < 64) ? 0x0100 : 0x1100) + (((n) & 0x3f) << 2))
care to put a comment on this calculation

> +#define DRCMR_MAPVLD	BIT(7)	/* Map Valid (read / write) */
> +#define DRCMR_CHLNUM	0x1f	/* mask for Channel Number (read / write) */
> +
> +#define DDADR_DESCADDR	0xfffffff0	/* Address of next descriptor (mask) */
> +#define DDADR_STOP	BIT(0)	/* Stop (read / write) */
> +
> +#define DCMD_INCSRCADDR	BIT(31)	/* Source Address Increment Setting. */
> +#define DCMD_INCTRGADDR	BIT(30)	/* Target Address Increment Setting. */
> +#define DCMD_FLOWSRC	BIT(29)	/* Flow Control by the source. */
> +#define DCMD_FLOWTRG	BIT(28)	/* Flow Control by the target. */
> +#define DCMD_STARTIRQEN	BIT(22)	/* Start Interrupt Enable */
> +#define DCMD_ENDIRQEN	BIT(21)	/* End Interrupt Enable */
> +#define DCMD_ENDIAN	BIT(18)	/* Device Endian-ness. */
> +#define DCMD_BURST8	(1 << 16)	/* 8 byte burst */
> +#define DCMD_BURST16	(2 << 16)	/* 16 byte burst */
> +#define DCMD_BURST32	(3 << 16)	/* 32 byte burst */
> +#define DCMD_WIDTH1	(1 << 14)	/* 1 byte width */
> +#define DCMD_WIDTH2	(2 << 14)	/* 2 byte width (HalfWord) */
> +#define DCMD_WIDTH4	(3 << 14)	/* 4 byte width (Word) */
> +#define DCMD_LENGTH	0x01fff		/* length mask (max = 8K - 1) */
Please namespace these ...

> +#define tx_to_pxad_desc(tx)					\
> +	container_of(tx, struct pxad_desc_sw, async_tx)
> +#define to_pxad_chan(dchan)					\
> +	container_of(dchan, struct pxad_chan, vc.chan)
> +#define to_pxad_dev(dmadev)					\
> +	container_of(dmadev, struct pxad_device, slave)
> +#define to_pxad_sw_desc(_vd)				\
> +	container_of((_vd), struct pxad_desc_sw, vd)
> +
> +#define pdma_err(pdma, fmt, arg...) \
> +	dev_err(pdma->slave.dev, "%s: " fmt, __func__, ## arg)
> +#define chan_dbg(_chan, fmt, arg...)					\
> +	dev_dbg(&(_chan)->vc.chan.dev->device, "%s(chan=%p): " fmt,	\
> +		__func__, (_chan), ## arg)
> +#define chan_vdbg(_chan, fmt, arg...)					\
> +	dev_vdbg(&(_chan)->vc.chan.dev->device, "%s(chan=%p): " fmt,	\
> +		__func__, (_chan), ## arg)
> +#define chan_warn(_chan, fmt, arg...)					\
> +	dev_warn(&(_chan)->vc.chan.dev->device, "%s(chan=%p): " fmt,	\
> +		 __func__, (_chan), ## arg)
> +#define chan_err(_chan, fmt, arg...)					\
> +	dev_err(&(_chan)->vc.chan.dev->device, "%s(chan=%p): " fmt,	\
> +		__func__, (_chan), ## arg)
am not a big fan of driver specfic debug macros, can we use dev_ ones please

> +
> +#define _phy_readl_relaxed(phy, _reg)					\
> +	readl_relaxed((phy)->base + _reg((phy)->idx))
> +#define phy_readl_relaxed(phy, _reg)					\
> +	({								\
> +		u32 _v;							\
> +		_v = readl_relaxed((phy)->base + _reg((phy)->idx));	\
> +		chan_vdbg(phy->vchan, "readl(%s): 0x%08x\n", #_reg,	\
> +			  _v);						\
> +		_v;							\
> +	})
> +#define phy_writel(phy, val, _reg)					\
> +	do {								\
> +		writel((val), (phy)->base + _reg((phy)->idx));		\
> +		chan_vdbg((phy)->vchan, "writel(0x%08x, %s)\n",		\
> +			  (u32)(val), #_reg);				\
> +	} while (0)
> +#define phy_writel_relaxed(phy, val, _reg)				\
> +	do {								\
> +		writel_relaxed((val), (phy)->base + _reg((phy)->idx));	\
> +		chan_vdbg((phy)->vchan, "writel(0x%08x, %s)\n",		\
> +			  (u32)(val), #_reg);				\
> +	} while (0)
> +
> +/*
??
Does this code compile?

> +
> +static struct pxad_phy *lookup_phy(struct pxad_chan *pchan)
> +{
> +	int prio, i;
> +	struct pxad_device *pdev = to_pxad_dev(pchan->vc.chan.device);
> +	struct pxad_phy *phy, *found = NULL;
> +	unsigned long flags;
> +
> +	/*
> +	 * dma channel priorities
> +	 * ch 0 - 3,  16 - 19  <--> (0)
> +	 * ch 4 - 7,  20 - 23  <--> (1)
> +	 * ch 8 - 11, 24 - 27  <--> (2)
> +	 * ch 12 - 15, 28 - 31  <--> (3)
> +	 */
> +
> +	spin_lock_irqsave(&pdev->phy_lock, flags);
> +	for (prio = pchan->prio; prio >= PXAD_PRIO_HIGHEST; prio--) {
> +		for (i = 0; i < pdev->nr_chans; i++) {
> +			if (prio != (i & 0xf) >> 2)
> +				continue;
> +			phy = &pdev->phys[i];
> +			if (!phy->vchan) {
> +				phy->vchan = pchan;
> +				found = phy;
> +				goto out_unlock;
what does phy have to do with priorty here?

> +static bool pxad_try_hotchain(struct virt_dma_chan *vc,
> +				  struct virt_dma_desc *vd)
> +{
> +	struct virt_dma_desc *vd_last_issued = NULL;
> +	struct pxad_chan *chan = to_pxad_chan(&vc->chan);
> +
> +	/*
> +	 * Attempt to hot chain the tx if the phy is still running. This is
> +	 * considered successful only if either the channel is still running
> +	 * after the chaining, or if the chained transfer is completed after
> +	 * having been hot chained.
> +	 * A change of alignment is not allowed, and forbids hotchaining.
> +	 */
okay, so what if while you are hotchaining the first txn completes, how do
we prevent these sort of races with HW?


> +static struct pxad_desc_sw *
> +pxad_alloc_desc(struct pxad_chan *chan, unsigned int nb_hw_desc)
> +{
> +	struct pxad_desc_sw *sw_desc;
> +	dma_addr_t dma;
> +	int i;
> +
> +	sw_desc = kzalloc(sizeof(*sw_desc) +
> +			  nb_hw_desc * sizeof(struct pxad_desc_hw *),
> +			  GFP_ATOMIC);
> +	if (!sw_desc) {
> +		chan_err(chan, "Couldn't allocate a sw_desc\n");
this is not required, memory allocator will spew this as well. I think
checkpatch should have warned you..

> +static inline struct dma_async_tx_descriptor *
> +pxad_tx_prep(struct virt_dma_chan *vc, struct virt_dma_desc *vd,
> +		 unsigned long tx_flags)
> +{
> +	struct dma_async_tx_descriptor *tx;
> +
> +	tx = vchan_tx_prep(vc, vd, tx_flags);
> +	tx->tx_submit = pxad_tx_submit;
> +	tx->tx_release = pxad_tx_release;
tx_release?


> +static int pxad_config(struct dma_chan *dchan,
> +		       struct dma_slave_config *cfg)
> +{
> +	struct pxad_chan *chan = to_pxad_chan(dchan);
> +	u32 maxburst = 0, dev_addr = 0;
> +	enum dma_slave_buswidth width = DMA_SLAVE_BUSWIDTH_UNDEFINED;
> +
> +	if (!dchan)
> +		return -EINVAL;
> +
> +	chan->dir = cfg->direction;
> +	chan->dcmd_base = 0;
> +
> +	if (cfg->direction == DMA_DEV_TO_MEM) {
direction is depricated, please copy the parameters and then use them in
your prep_ based on direction passed


> +static unsigned int pxad_residue(struct pxad_chan *chan,
> +				 dma_cookie_t cookie)
> +{
> +	struct virt_dma_desc *vd = NULL;
> +	struct pxad_desc_sw *sw_desc = NULL;
> +	struct pxad_desc_hw *hw_desc = NULL;
> +	u32 curr, start, len, end, residue = 0;
> +	unsigned long flags;
> +	bool passed = false, prev_completed = true;
> +	int i;
> +
> +	/*
> +	 * If the channel does not have a phy pointer anymore, it has already
> +	 * been completed. Therefore, its residue is 0.
> +	 */
> +	if (!chan->phy)
> +		return 0;
> +
> +	if (chan->dir == DMA_DEV_TO_MEM)
> +		curr = phy_readl_relaxed(chan->phy, DTADR);
> +	else
> +		curr = phy_readl_relaxed(chan->phy, DSADR);
> +
> +	spin_lock_irqsave(&chan->vc.lock, flags);
> +
> +	list_for_each_entry(vd, &chan->vc.desc_issued, node) {
> +		sw_desc = to_pxad_sw_desc(vd);
> +
> +		if (vd->tx.cookie == cookie && !prev_completed) {
> +			residue = sw_desc->len;
> +			break;
> +		}
> +		prev_completed = is_desc_completed(vd);
why not use vchan_find_desc() ?

> +
> +		if (vd->tx.cookie != cookie)
> +			continue;
> +
> +		for (i = 0; i < sw_desc->nb_desc - 1; i++) {
> +			hw_desc = sw_desc->hw_desc[i];
> +			if (chan->dir == DMA_DEV_TO_MEM)
> +				start = hw_desc->dtadr;
> +			else
> +				start = hw_desc->dsadr;
> +			len = hw_desc->dcmd & DCMD_LENGTH;
> +			end = start + len;
> +
> +			/*
> +			 * 'passed' will be latched once we found the descriptor
> +			 * which lies inside the boundaries of the curr
> +			 * pointer. All descriptors that occur in the list
> +			 * _after_ we found that partially handled descriptor
> +			 * are still to be processed and are hence added to the
> +			 * residual bytes counter.
> +			 */
> +
> +			if (passed) {
> +				residue += len;
> +			} else if (curr >= start && curr <= end) {
> +				residue += end - curr;
> +				passed = true;
> +			}
> +		}
> +
> +		break;
> +	}
> +
> +	spin_unlock_irqrestore(&chan->vc.lock, flags);
> +	chan_dbg(chan, "txd %p[%x] sw_desc=%p: %d\n",
> +		 vd, cookie, sw_desc, residue);
> +	return residue;
> +}
> +
> +static enum dma_status pxad_tx_status(struct dma_chan *dchan,
> +				      dma_cookie_t cookie,
> +				      struct dma_tx_state *txstate)
> +{
> +	struct pxad_chan *chan = to_pxad_chan(dchan);
> +	enum dma_status ret;
> +
> +	ret = dma_cookie_status(dchan, cookie, txstate);
pls check if txstate is valid 

> +	if (likely(ret != DMA_ERROR))
> +		dma_set_residue(txstate, pxad_residue(chan, cookie));
> +
> +	return ret;
> +}
> +
> +static void pxad_free_channels(struct dma_device *dmadev)
> +{
> +	struct pxad_chan *c, *cn;
> +
> +	list_for_each_entry_safe(c, cn, &dmadev->channels,
> +				 vc.chan.device_node) {
> +		list_del(&c->vc.chan.device_node);
> +		tasklet_kill(&c->vc.task);
> +	}
> +}
> +
> +static int pxad_remove(struct platform_device *op)
> +{
> +	struct pxad_device *pdev = platform_get_drvdata(op);
> +
you should free up irq as well, otherwise device can still generate
interrupts

Thanks
Robert Jarzmik May 8, 2015, 12:28 p.m. UTC | #2
Vinod Koul <vinod.koul@intel.com> writes:

Hi Vinod,

First thanks for the review.

>> This is a new driver for pxa SoCs, which is also compatible with the former
>> mmp_pdma.
> The rationale is fine, is there a plan to remove old mmp_pdma then?
I have this in mind, yes. I will need a test from the MMP people to assess
pxa-dma works flawlessly with MMP architecture, with the same level of
stability. Once I have their blessing, I'll propose to remove mmp_pdma.

>> +config PXA_DMA
>> +	bool "PXA DMA support"
> no prompt?
Right, this is missing.  I first thought this will always be selected by
CONFIG_ARCH_PXA, and no user selection would be allowed. But I changed my mind
since then, and I'll add a prompt description there. For v3.

>> +#define DRCMR(n)	((((n) < 64) ? 0x0100 : 0x1100) + (((n) & 0x3f) << 2))
> care to put a comment on this calculation
Definitely. For v3.

>> +#define DRCMR_MAPVLD	BIT(7)	/* Map Valid (read / write) */
>> +#define DRCMR_CHLNUM	0x1f	/* mask for Channel Number (read / write) */
>> +
>> +#define DDADR_DESCADDR	0xfffffff0	/* Address of next descriptor (mask) */
>> +#define DDADR_STOP	BIT(0)	/* Stop (read / write) */
>> +
>> +#define DCMD_INCSRCADDR	BIT(31)	/* Source Address Increment Setting. */
>> +#define DCMD_INCTRGADDR	BIT(30)	/* Target Address Increment Setting. */
>> +#define DCMD_FLOWSRC	BIT(29)	/* Flow Control by the source. */
>> +#define DCMD_FLOWTRG	BIT(28)	/* Flow Control by the target. */
>> +#define DCMD_STARTIRQEN	BIT(22)	/* Start Interrupt Enable */
>> +#define DCMD_ENDIRQEN	BIT(21)	/* End Interrupt Enable */
>> +#define DCMD_ENDIAN	BIT(18)	/* Device Endian-ness. */
>> +#define DCMD_BURST8	(1 << 16)	/* 8 byte burst */
>> +#define DCMD_BURST16	(2 << 16)	/* 16 byte burst */
>> +#define DCMD_BURST32	(3 << 16)	/* 32 byte burst */
>> +#define DCMD_WIDTH1	(1 << 14)	/* 1 byte width */
>> +#define DCMD_WIDTH2	(2 << 14)	/* 2 byte width (HalfWord) */
>> +#define DCMD_WIDTH4	(3 << 14)	/* 4 byte width (Word) */
>> +#define DCMD_LENGTH	0x01fff		/* length mask (max = 8K - 1) */
> Please namespace these ...
Sorry I don't get this comment, would you care to explain me please ?

>> +#define chan_dbg(_chan, fmt, arg...)					\
>> +	dev_dbg(&(_chan)->vc.chan.dev->device, "%s(chan=%p): " fmt,	\
>> +		__func__, (_chan), ## arg)
...
> am not a big fan of driver specfic debug macros, can we use dev_ ones please
As you wish.  This will make the code ugly for all the chan_dbg() calls, which
is a bit a pity, but if you think this should be done that way then let's do
that.

>> +#define _phy_readl_relaxed(phy, _reg)					\
>> +	readl_relaxed((phy)->base + _reg((phy)->idx))
>> +#define phy_readl_relaxed(phy, _reg)					\
>> +	({								\
>> +		u32 _v;							\
>> +		_v = readl_relaxed((phy)->base + _reg((phy)->idx));	\
>> +		chan_vdbg(phy->vchan, "readl(%s): 0x%08x\n", #_reg,	\
>> +			  _v);						\
>> +		_v;							\
>> +	})
>> +#define phy_writel(phy, val, _reg)					\
>> +	do {								\
>> +		writel((val), (phy)->base + _reg((phy)->idx));		\
>> +		chan_vdbg((phy)->vchan, "writel(0x%08x, %s)\n",		\
>> +			  (u32)(val), #_reg);				\
>> +	} while (0)
>> +#define phy_writel_relaxed(phy, val, _reg)				\
>> +	do {								\
>> +		writel_relaxed((val), (phy)->base + _reg((phy)->idx));	\
>> +		chan_vdbg((phy)->vchan, "writel(0x%08x, %s)\n",		\
>> +			  (u32)(val), #_reg);				\
>> +	} while (0)
>> +
>> +/*
> ??
> Does this code compile?
Oh yes, it compiles and works with both debug on and debug off. This is actually
the most handy debug trace of this driver. I've been using that kind of
accessors for years in my drivers, and this is truly something I need to
maintain the driver, especially when a nasty corner case happens on a hardware I
don't own.

And I've tested this thoroughly when I was stabilizing this driver.

>> +	/*
>> +	 * dma channel priorities
>> +	 * ch 0 - 3,  16 - 19  <--> (0)
>> +	 * ch 4 - 7,  20 - 23  <--> (1)
>> +	 * ch 8 - 11, 24 - 27  <--> (2)
>> +	 * ch 12 - 15, 28 - 31  <--> (3)
>> +	 */
>> +
>> +	spin_lock_irqsave(&pdev->phy_lock, flags);
>> +	for (prio = pchan->prio; prio >= PXAD_PRIO_HIGHEST; prio--) {
>> +		for (i = 0; i < pdev->nr_chans; i++) {
>> +			if (prio != (i & 0xf) >> 2)
>> +				continue;
>> +			phy = &pdev->phys[i];
>> +			if (!phy->vchan) {
>> +				phy->vchan = pchan;
>> +				found = phy;
>> +				goto out_unlock;
> what does phy have to do with priorty here?
Each phy has a priority, it's part of the hardware.  IOW each phy has a "granted
bandwidth". This guarantee is based on the number of request on the system bus
the DMA IP can place.

In PXA2xx/PXA3xx, the DMA IP can have 8 simulaneous request. The highest
priority phys always get 4 slots, the high 2 slots, etc ...

So a priority is an intrinsic property of a phy.

>
>> +static bool pxad_try_hotchain(struct virt_dma_chan *vc,
>> +				  struct virt_dma_desc *vd)
>> +{
>> +	struct virt_dma_desc *vd_last_issued = NULL;
>> +	struct pxad_chan *chan = to_pxad_chan(&vc->chan);
>> +
>> +	/*
>> +	 * Attempt to hot chain the tx if the phy is still running. This is
>> +	 * considered successful only if either the channel is still running
>> +	 * after the chaining, or if the chained transfer is completed after
>> +	 * having been hot chained.
>> +	 * A change of alignment is not allowed, and forbids hotchaining.
>> +	 */
> okay, so what if while you are hotchaining the first txn completes, how do
> we prevent these sort of races with HW?

In the case where hotchaining is attempted, and while hotchaining txn completes,
obviously the channel stops, and the hotchained descriptor is not
"hotchained". This is the reason the function is called "...try_hotchain" and
not hotchain.

There is actually no race :
- either you succeed the hotchain, ie. the atomic write of the dma link
  descriptor happens before the txn finishes
- or you fail the hotchain, in which case the channel stops without beginning
  the new chained descriptor, ie. the atomic write of the dma link descriptor
  happens after txn finishes

It's a "try and test after" approach, and the "try" is atomic because it's a
single u32 write to link the descriptors (which is materialized in
pxad_desc_chain()).

>> +static struct pxad_desc_sw *
>> +pxad_alloc_desc(struct pxad_chan *chan, unsigned int nb_hw_desc)
>> +{
>> +	struct pxad_desc_sw *sw_desc;
>> +	dma_addr_t dma;
>> +	int i;
>> +
>> +	sw_desc = kzalloc(sizeof(*sw_desc) +
>> +			  nb_hw_desc * sizeof(struct pxad_desc_hw *),
>> +			  GFP_ATOMIC);
>> +	if (!sw_desc) {
>> +		chan_err(chan, "Couldn't allocate a sw_desc\n");
> this is not required, memory allocator will spew this as well. I think
> checkpatch should have warned you..
Checkpatch did not, but I agree, will remove this print alltogether. For v3.

>> +static inline struct dma_async_tx_descriptor *
>> +pxad_tx_prep(struct virt_dma_chan *vc, struct virt_dma_desc *vd,
>> +		 unsigned long tx_flags)
>> +{
>> +	struct dma_async_tx_descriptor *tx;
>> +
>> +	tx = vchan_tx_prep(vc, vd, tx_flags);
>> +	tx->tx_submit = pxad_tx_submit;
>> +	tx->tx_release = pxad_tx_release;
> tx_release?
Oh sorry, this is a leftover from our former discussion on descriptor
reuse. You've convinced me since then to use async_tx_test_ack() in virt-dma.c,
so I will remove that. For v3.

>> +static int pxad_config(struct dma_chan *dchan,
>> +		       struct dma_slave_config *cfg)
>> +{
>> +	struct pxad_chan *chan = to_pxad_chan(dchan);
>> +	u32 maxburst = 0, dev_addr = 0;
>> +	enum dma_slave_buswidth width = DMA_SLAVE_BUSWIDTH_UNDEFINED;
>> +
>> +	if (!dchan)
>> +		return -EINVAL;
>> +
>> +	chan->dir = cfg->direction;
>> +	chan->dcmd_base = 0;
>> +
>> +	if (cfg->direction == DMA_DEV_TO_MEM) {
> direction is depricated, please copy the parameters and then use them in
> your prep_ based on direction passed
Understood. For v3.

>> +	spin_lock_irqsave(&chan->vc.lock, flags);
>> +
>> +	list_for_each_entry(vd, &chan->vc.desc_issued, node) {
>> +		sw_desc = to_pxad_sw_desc(vd);
>> +
>> +		if (vd->tx.cookie == cookie && !prev_completed) {
>> +			residue = sw_desc->len;
>> +			break;
>> +		}
>> +		prev_completed = is_desc_completed(vd);
> why not use vchan_find_desc() ?
Indeed, why not :) I had not in mind the existence of this function. I'll use it
for v3.

>> +static enum dma_status pxad_tx_status(struct dma_chan *dchan,
>> +				      dma_cookie_t cookie,
>> +				      struct dma_tx_state *txstate)
>> +{
>> +	struct pxad_chan *chan = to_pxad_chan(dchan);
>> +	enum dma_status ret;
>> +
>> +	ret = dma_cookie_status(dchan, cookie, txstate);
> pls check if txstate is valid
My understanding is that's already the job of dma_cookie_status() and
dma_set_residue().

>> +static void pxad_free_channels(struct dma_device *dmadev)
>> +{
>> +	struct pxad_chan *c, *cn;
>> +
>> +	list_for_each_entry_safe(c, cn, &dmadev->channels,
>> +				 vc.chan.device_node) {
>> +		list_del(&c->vc.chan.device_node);
>> +		tasklet_kill(&c->vc.task);
>> +	}
>> +}
>> +
>> +static int pxad_remove(struct platform_device *op)
>> +{
>> +	struct pxad_device *pdev = platform_get_drvdata(op);
>> +
> you should free up irq as well, otherwise device can still generate
> interrupts
They are freed automatically because I used devm_request_irq() and not
request_irq().

Cheers.
Vinod Koul May 9, 2015, 11:59 a.m. UTC | #3
On Fri, May 08, 2015 at 02:28:37PM +0200, Robert Jarzmik wrote:
> Vinod Koul <vinod.koul@intel.com> writes:
> 
> 
> >> +#define DRCMR_MAPVLD	BIT(7)	/* Map Valid (read / write) */
> >> +#define DRCMR_CHLNUM	0x1f	/* mask for Channel Number (read / write) */
> >> +
> >> +#define DDADR_DESCADDR	0xfffffff0	/* Address of next descriptor (mask) */
> >> +#define DDADR_STOP	BIT(0)	/* Stop (read / write) */
> >> +
> >> +#define DCMD_INCSRCADDR	BIT(31)	/* Source Address Increment Setting. */
> >> +#define DCMD_INCTRGADDR	BIT(30)	/* Target Address Increment Setting. */
> >> +#define DCMD_FLOWSRC	BIT(29)	/* Flow Control by the source. */
> >> +#define DCMD_FLOWTRG	BIT(28)	/* Flow Control by the target. */
> >> +#define DCMD_STARTIRQEN	BIT(22)	/* Start Interrupt Enable */
> >> +#define DCMD_ENDIRQEN	BIT(21)	/* End Interrupt Enable */
> >> +#define DCMD_ENDIAN	BIT(18)	/* Device Endian-ness. */
> >> +#define DCMD_BURST8	(1 << 16)	/* 8 byte burst */
> >> +#define DCMD_BURST16	(2 << 16)	/* 16 byte burst */
> >> +#define DCMD_BURST32	(3 << 16)	/* 32 byte burst */
> >> +#define DCMD_WIDTH1	(1 << 14)	/* 1 byte width */
> >> +#define DCMD_WIDTH2	(2 << 14)	/* 2 byte width (HalfWord) */
> >> +#define DCMD_WIDTH4	(3 << 14)	/* 4 byte width (Word) */
> >> +#define DCMD_LENGTH	0x01fff		/* length mask (max = 8K - 1) */
> > Please namespace these ...
> Sorry I don't get this comment, would you care to explain me please ?
Right now you are using very genric names which can conflict with others, so
makese sense to add PXA_DCMD_LENGTH with me lesser prone to conflicts rather
than DCMD_LENGTH

> 
> >> +#define _phy_readl_relaxed(phy, _reg)					\
> >> +	readl_relaxed((phy)->base + _reg((phy)->idx))
> >> +#define phy_readl_relaxed(phy, _reg)					\
> >> +	({								\
> >> +		u32 _v;							\
> >> +		_v = readl_relaxed((phy)->base + _reg((phy)->idx));	\
> >> +		chan_vdbg(phy->vchan, "readl(%s): 0x%08x\n", #_reg,	\
> >> +			  _v);						\
> >> +		_v;							\
> >> +	})
> >> +#define phy_writel(phy, val, _reg)					\
> >> +	do {								\
> >> +		writel((val), (phy)->base + _reg((phy)->idx));		\
> >> +		chan_vdbg((phy)->vchan, "writel(0x%08x, %s)\n",		\
> >> +			  (u32)(val), #_reg);				\
> >> +	} while (0)
> >> +#define phy_writel_relaxed(phy, val, _reg)				\
> >> +	do {								\
> >> +		writel_relaxed((val), (phy)->base + _reg((phy)->idx));	\
> >> +		chan_vdbg((phy)->vchan, "writel(0x%08x, %s)\n",		\
> >> +			  (u32)(val), #_reg);				\
> >> +	} while (0)
> >> +
> >> +/*
> > ??
> > Does this code compile?
> Oh yes, it compiles and works with both debug on and debug off. This is actually
> the most handy debug trace of this driver. I've been using that kind of
> accessors for years in my drivers, and this is truly something I need to
> maintain the driver, especially when a nasty corner case happens on a hardware I
> don't own.
You have start of comment on that line, no ending, so how does it compile!

> >> +	spin_lock_irqsave(&pdev->phy_lock, flags);
> >> +	for (prio = pchan->prio; prio >= PXAD_PRIO_HIGHEST; prio--) {
> >> +		for (i = 0; i < pdev->nr_chans; i++) {
> >> +			if (prio != (i & 0xf) >> 2)
> >> +				continue;
> >> +			phy = &pdev->phys[i];
> >> +			if (!phy->vchan) {
> >> +				phy->vchan = pchan;
> >> +				found = phy;
> >> +				goto out_unlock;
> > what does phy have to do with priorty here?
> Each phy has a priority, it's part of the hardware.  IOW each phy has a "granted
> bandwidth". This guarantee is based on the number of request on the system bus
> the DMA IP can place.
> 
> In PXA2xx/PXA3xx, the DMA IP can have 8 simulaneous request. The highest
> priority phys always get 4 slots, the high 2 slots, etc ...
> 
> So a priority is an intrinsic property of a phy.
Yes that part is ok, but why are you looking up priorty while searching for
a phy, searching thru number of channels should suffice?

> >> +static struct pxad_desc_sw *
> >> +pxad_alloc_desc(struct pxad_chan *chan, unsigned int nb_hw_desc)
> >> +{
> >> +	struct pxad_desc_sw *sw_desc;
> >> +	dma_addr_t dma;
> >> +	int i;
> >> +
> >> +	sw_desc = kzalloc(sizeof(*sw_desc) +
> >> +			  nb_hw_desc * sizeof(struct pxad_desc_hw *),
> >> +			  GFP_ATOMIC);
> >> +	if (!sw_desc) {
> >> +		chan_err(chan, "Couldn't allocate a sw_desc\n");
> > this is not required, memory allocator will spew this as well. I think
> > checkpatch should have warned you..
> Checkpatch did not, but I agree, will remove this print alltogether. For v3.
surprised it does actually, see,
# check for unnecessary "Out of Memory" messages

> 
> >> +static enum dma_status pxad_tx_status(struct dma_chan *dchan,
> >> +				      dma_cookie_t cookie,
> >> +				      struct dma_tx_state *txstate)
> >> +{
> >> +	struct pxad_chan *chan = to_pxad_chan(dchan);
> >> +	enum dma_status ret;
> >> +
> >> +	ret = dma_cookie_status(dchan, cookie, txstate);
> > pls check if txstate is valid
> My understanding is that's already the job of dma_cookie_status() and
> dma_set_residue().
Yes it is, but is txstate is NULL then no need to calculate residue so bail
out here
Robert Jarzmik May 9, 2015, 5 p.m. UTC | #4
Vinod Koul <vinod.koul@intel.com> writes:

> On Fri, May 08, 2015 at 02:28:37PM +0200, Robert Jarzmik wrote:
>> Sorry I don't get this comment, would you care to explain me please ?
> Right now you are using very genric names which can conflict with others, so
> makese sense to add PXA_DCMD_LENGTH with me lesser prone to conflicts rather
> than DCMD_LENGTH
OK, got it, for v3.

>> >> +	} while (0)
>> >> +
>> >> +/*
>> > ??
>> > Does this code compile?
Ah I see what happens. The patch 4/5 adds :
+ * Debug fs
+ */

And fixes this. Actually this line belongs to patch 4/5, and not 3/5. That's why
I didn't see the compilation issue, I compiled with all 5 patches applied. I'll
move that comment beginner to patch 4/5, good catch.

I'll also run it through my "bissectability check" automaton to see if other
culprits lay around.

>> >> +	spin_lock_irqsave(&pdev->phy_lock, flags);
>> >> +	for (prio = pchan->prio; prio >= PXAD_PRIO_HIGHEST; prio--) {
>> >> +		for (i = 0; i < pdev->nr_chans; i++) {
>> >> +			if (prio != (i & 0xf) >> 2)
>> >> +				continue;
>> >> +			phy = &pdev->phys[i];
>> >> +			if (!phy->vchan) {
>> >> +				phy->vchan = pchan;
>> >> +				found = phy;
>> >> +				goto out_unlock;
>> > what does phy have to do with priorty here?
>> Each phy has a priority, it's part of the hardware.  IOW each phy has a "granted
>> bandwidth". This guarantee is based on the number of request on the system bus
>> the DMA IP can place.
>> 
>> In PXA2xx/PXA3xx, the DMA IP can have 8 simulaneous request. The highest
>> priority phys always get 4 slots, the high 2 slots, etc ...
>> 
>> So a priority is an intrinsic property of a phy.
> Yes that part is ok, but why are you looking up priorty while searching for
> a phy, searching thru number of channels should suffice?
This loop has this as a requirement :
 - if available, find the first free phy of priority prio
 - if not available, find the first free phy of higher priority than prio
 - etc ... up to all priorities

That fullfills the bandwidth requirement, ie. a channel request of priority prio
ends up mapped on a phy of priority prio or better. Is it any clearer now ?

>> >> +static enum dma_status pxad_tx_status(struct dma_chan *dchan,
>> >> +				      dma_cookie_t cookie,
>> >> +				      struct dma_tx_state *txstate)
>> >> +{
>> >> +	struct pxad_chan *chan = to_pxad_chan(dchan);
>> >> +	enum dma_status ret;
>> >> +
>> >> +	ret = dma_cookie_status(dchan, cookie, txstate);
>> > pls check if txstate is valid
>> My understanding is that's already the job of dma_cookie_status() and
>> dma_set_residue().
> Yes it is, but is txstate is NULL then no need to calculate residue so bail
> out here
Ah got it now, it's an optimization to not call pxad_residue() needlessly. Ok,
I'll add that for v3, good idea.
diff mbox

Patch

diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index a874b6e..87804f0 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -162,6 +162,17 @@  config MX3_IPU_IRQS
 	  To avoid bloating the irq_desc[] array we allocate a sufficient
 	  number of IRQ slots and map them dynamically to specific sources.
 
+config PXA_DMA
+	bool "PXA DMA support"
+	depends on (ARCH_MMP || ARCH_PXA)
+	select DMA_ENGINE
+	select DMA_VIRTUAL_CHANNELS
+	help
+	  Support the DMA engine for PXA. It is also compatible with MMP PDMA
+	  platform. The internal DMA IP of all PXA variants is supported, with
+	  16 to 32 channels for peripheral to memory or memory to memory
+	  transfers.
+
 config TXX9_DMAC
 	tristate "Toshiba TXx9 SoC DMA support"
 	depends on MACH_TX49XX || MACH_TX39XX
diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
index f915f61..07ef9e2 100644
--- a/drivers/dma/Makefile
+++ b/drivers/dma/Makefile
@@ -25,6 +25,7 @@  obj-$(CONFIG_AMCC_PPC440SPE_ADMA) += ppc4xx/
 obj-$(CONFIG_IMX_SDMA) += imx-sdma.o
 obj-$(CONFIG_IMX_DMA) += imx-dma.o
 obj-$(CONFIG_MXS_DMA) += mxs-dma.o
+obj-$(CONFIG_PXA_DMA) += pxa_dma.o
 obj-$(CONFIG_TIMB_DMA) += timb_dma.o
 obj-$(CONFIG_SIRF_DMA) += sirf-dma.o
 obj-$(CONFIG_TI_EDMA) += edma.o
diff --git a/drivers/dma/pxa_dma.c b/drivers/dma/pxa_dma.c
new file mode 100644
index 0000000..f2064fa
--- /dev/null
+++ b/drivers/dma/pxa_dma.c
@@ -0,0 +1,1211 @@ 
+/*
+ * Copyright 2015 Robert Jarzmik <robert.jarzmik@free.fr>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/interrupt.h>
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+#include <linux/dmaengine.h>
+#include <linux/platform_device.h>
+#include <linux/device.h>
+#include <linux/platform_data/mmp_dma.h>
+#include <linux/dmapool.h>
+#include <linux/of_device.h>
+#include <linux/of_dma.h>
+#include <linux/of.h>
+#include <linux/dma/pxa-dma.h>
+
+#include "dmaengine.h"
+#include "virt-dma.h"
+
+#define DCSR(n)		(0x0000 + ((n) << 2))
+#define DALGN(n)	0x00a0
+#define DINT		0x00f0
+#define DDADR(n)	(0x0200 + ((n) << 4))
+#define DSADR(n)	(0x0204 + ((n) << 4))
+#define DTADR(n)	(0x0208 + ((n) << 4))
+#define DCMD(n)		(0x020c + ((n) << 4))
+
+#define DCSR_RUN	BIT(31)	/* Run Bit (read / write) */
+#define DCSR_NODESC	BIT(30)	/* No-Descriptor Fetch (read / write) */
+#define DCSR_STOPIRQEN	BIT(29)	/* Stop Interrupt Enable (read / write) */
+#define DCSR_REQPEND	BIT(8)	/* Request Pending (read-only) */
+#define DCSR_STOPSTATE	BIT(3)	/* Stop State (read-only) */
+#define DCSR_ENDINTR	BIT(2)	/* End Interrupt (read / write) */
+#define DCSR_STARTINTR	BIT(1)	/* Start Interrupt (read / write) */
+#define DCSR_BUSERR	BIT(0)	/* Bus Error Interrupt (read / write) */
+
+#define DCSR_EORIRQEN	BIT(28)	/* End of Receive Interrupt Enable (R/W) */
+#define DCSR_EORJMPEN	BIT(27)	/* Jump to next descriptor on EOR */
+#define DCSR_EORSTOPEN	BIT(26)	/* STOP on an EOR */
+#define DCSR_SETCMPST	BIT(25)	/* Set Descriptor Compare Status */
+#define DCSR_CLRCMPST	BIT(24)	/* Clear Descriptor Compare Status */
+#define DCSR_CMPST	BIT(10)	/* The Descriptor Compare Status */
+#define DCSR_EORINTR	BIT(9)	/* The end of Receive */
+
+#define DRCMR(n)	((((n) < 64) ? 0x0100 : 0x1100) + (((n) & 0x3f) << 2))
+#define DRCMR_MAPVLD	BIT(7)	/* Map Valid (read / write) */
+#define DRCMR_CHLNUM	0x1f	/* mask for Channel Number (read / write) */
+
+#define DDADR_DESCADDR	0xfffffff0	/* Address of next descriptor (mask) */
+#define DDADR_STOP	BIT(0)	/* Stop (read / write) */
+
+#define DCMD_INCSRCADDR	BIT(31)	/* Source Address Increment Setting. */
+#define DCMD_INCTRGADDR	BIT(30)	/* Target Address Increment Setting. */
+#define DCMD_FLOWSRC	BIT(29)	/* Flow Control by the source. */
+#define DCMD_FLOWTRG	BIT(28)	/* Flow Control by the target. */
+#define DCMD_STARTIRQEN	BIT(22)	/* Start Interrupt Enable */
+#define DCMD_ENDIRQEN	BIT(21)	/* End Interrupt Enable */
+#define DCMD_ENDIAN	BIT(18)	/* Device Endian-ness. */
+#define DCMD_BURST8	(1 << 16)	/* 8 byte burst */
+#define DCMD_BURST16	(2 << 16)	/* 16 byte burst */
+#define DCMD_BURST32	(3 << 16)	/* 32 byte burst */
+#define DCMD_WIDTH1	(1 << 14)	/* 1 byte width */
+#define DCMD_WIDTH2	(2 << 14)	/* 2 byte width (HalfWord) */
+#define DCMD_WIDTH4	(3 << 14)	/* 4 byte width (Word) */
+#define DCMD_LENGTH	0x01fff		/* length mask (max = 8K - 1) */
+
+#define PDMA_ALIGNMENT		3
+#define PDMA_MAX_DESC_BYTES	(DCMD_LENGTH & ~((1 << PDMA_ALIGNMENT) - 1))
+
+struct pxad_desc_hw {
+	u32 ddadr;	/* Points to the next descriptor + flags */
+	u32 dsadr;	/* DSADR value for the current transfer */
+	u32 dtadr;	/* DTADR value for the current transfer */
+	u32 dcmd;	/* DCMD value for the current transfer */
+} __aligned(16);
+
+struct pxad_desc_sw {
+	struct virt_dma_desc	vd;		/* Virtual descriptor */
+	int			nb_desc;	/* Number of hw. descriptors */
+	size_t			len;		/* Number of bytes xfered */
+	dma_addr_t		first;		/* First descriptor's addr */
+
+	/* At least one descriptor has an src/dst address not multiple of 8 */
+	bool			misaligned;
+	bool			cyclic;
+	struct dma_pool		*desc_pool;	/* Channel's used allocator */
+
+	struct pxad_desc_hw	*hw_desc[];	/* DMA coherent descriptors */
+};
+
+struct pxad_phy {
+	int			idx;
+	void __iomem		*base;
+	struct pxad_chan	*vchan;
+};
+
+struct pxad_chan {
+	struct virt_dma_chan	vc;		/* Virtual channel */
+	u32			drcmr;		/* Requestor of the channel */
+	enum pxad_chan_prio	prio;		/* Required priority of phy */
+	u32			dcmd_base;	/* Burst sz, dir, bus width */
+	u32			dev_addr;	/* Device transfers address */
+	enum dma_transfer_direction	dir;	/* Transfer direction */
+	/*
+	 * At least one desc_sw in submitted or issued transfers on this channel
+	 * has one address such as: addr % 8 != 0. This implies the DALGN
+	 * setting on the phy.
+	 */
+	bool			misaligned;
+
+	/* protected by vc->lock */
+	struct pxad_phy		*phy;
+	struct dma_pool		*desc_pool;	/* Descriptors pool */
+};
+
+struct pxad_device {
+	struct dma_device		slave;
+	int				nr_chans;
+	void __iomem			*base;
+	struct pxad_phy			*phys;
+	spinlock_t			phy_lock;	/* Phy association */
+};
+
+#define tx_to_pxad_desc(tx)					\
+	container_of(tx, struct pxad_desc_sw, async_tx)
+#define to_pxad_chan(dchan)					\
+	container_of(dchan, struct pxad_chan, vc.chan)
+#define to_pxad_dev(dmadev)					\
+	container_of(dmadev, struct pxad_device, slave)
+#define to_pxad_sw_desc(_vd)				\
+	container_of((_vd), struct pxad_desc_sw, vd)
+
+#define pdma_err(pdma, fmt, arg...) \
+	dev_err(pdma->slave.dev, "%s: " fmt, __func__, ## arg)
+#define chan_dbg(_chan, fmt, arg...)					\
+	dev_dbg(&(_chan)->vc.chan.dev->device, "%s(chan=%p): " fmt,	\
+		__func__, (_chan), ## arg)
+#define chan_vdbg(_chan, fmt, arg...)					\
+	dev_vdbg(&(_chan)->vc.chan.dev->device, "%s(chan=%p): " fmt,	\
+		__func__, (_chan), ## arg)
+#define chan_warn(_chan, fmt, arg...)					\
+	dev_warn(&(_chan)->vc.chan.dev->device, "%s(chan=%p): " fmt,	\
+		 __func__, (_chan), ## arg)
+#define chan_err(_chan, fmt, arg...)					\
+	dev_err(&(_chan)->vc.chan.dev->device, "%s(chan=%p): " fmt,	\
+		__func__, (_chan), ## arg)
+
+#define _phy_readl_relaxed(phy, _reg)					\
+	readl_relaxed((phy)->base + _reg((phy)->idx))
+#define phy_readl_relaxed(phy, _reg)					\
+	({								\
+		u32 _v;							\
+		_v = readl_relaxed((phy)->base + _reg((phy)->idx));	\
+		chan_vdbg(phy->vchan, "readl(%s): 0x%08x\n", #_reg,	\
+			  _v);						\
+		_v;							\
+	})
+#define phy_writel(phy, val, _reg)					\
+	do {								\
+		writel((val), (phy)->base + _reg((phy)->idx));		\
+		chan_vdbg((phy)->vchan, "writel(0x%08x, %s)\n",		\
+			  (u32)(val), #_reg);				\
+	} while (0)
+#define phy_writel_relaxed(phy, val, _reg)				\
+	do {								\
+		writel_relaxed((val), (phy)->base + _reg((phy)->idx));	\
+		chan_vdbg((phy)->vchan, "writel(0x%08x, %s)\n",		\
+			  (u32)(val), #_reg);				\
+	} while (0)
+
+/*
+
+static struct pxad_phy *lookup_phy(struct pxad_chan *pchan)
+{
+	int prio, i;
+	struct pxad_device *pdev = to_pxad_dev(pchan->vc.chan.device);
+	struct pxad_phy *phy, *found = NULL;
+	unsigned long flags;
+
+	/*
+	 * dma channel priorities
+	 * ch 0 - 3,  16 - 19  <--> (0)
+	 * ch 4 - 7,  20 - 23  <--> (1)
+	 * ch 8 - 11, 24 - 27  <--> (2)
+	 * ch 12 - 15, 28 - 31  <--> (3)
+	 */
+
+	spin_lock_irqsave(&pdev->phy_lock, flags);
+	for (prio = pchan->prio; prio >= PXAD_PRIO_HIGHEST; prio--) {
+		for (i = 0; i < pdev->nr_chans; i++) {
+			if (prio != (i & 0xf) >> 2)
+				continue;
+			phy = &pdev->phys[i];
+			if (!phy->vchan) {
+				phy->vchan = pchan;
+				found = phy;
+				goto out_unlock;
+			}
+		}
+	}
+
+out_unlock:
+	spin_unlock_irqrestore(&pdev->phy_lock, flags);
+	chan_dbg(pchan, "phy=%p(%d)\n", found, found ? found->idx : -1);
+
+	return found;
+}
+
+static void pxad_free_phy(struct pxad_chan *chan)
+{
+	struct pxad_device *pdev = to_pxad_dev(chan->vc.chan.device);
+	unsigned long flags;
+	u32 reg;
+
+	chan_dbg(chan, "freeing\n");
+	if (!chan->phy)
+		return;
+
+	/* clear the channel mapping in DRCMR */
+	reg = DRCMR(chan->drcmr);
+	writel_relaxed(0, chan->phy->base + reg);
+
+	spin_lock_irqsave(&pdev->phy_lock, flags);
+	chan->phy->vchan = NULL;
+	chan->phy = NULL;
+	spin_unlock_irqrestore(&pdev->phy_lock, flags);
+}
+
+static bool is_chan_running(struct pxad_chan *chan)
+{
+	u32 dcsr;
+	struct pxad_phy *phy = chan->phy;
+
+	if (!phy)
+		return false;
+	dcsr = phy_readl_relaxed(phy, DCSR);
+	return dcsr & DCSR_RUN;
+}
+
+static bool is_running_chan_misaligned(struct pxad_chan *chan)
+{
+	u32 dalgn;
+
+	BUG_ON(!chan->phy);
+	dalgn = phy_readl_relaxed(chan->phy, DALGN);
+	return dalgn & (BIT(chan->phy->idx));
+}
+
+static void phy_enable(struct pxad_phy *phy, bool misaligned)
+{
+	u32 reg, dalgn;
+
+	if (!phy->vchan)
+		return;
+
+	chan_dbg(phy->vchan, "phy=%p(%d) misaligned=%d\n",
+		 phy, phy->idx, misaligned);
+
+	reg = DRCMR(phy->vchan->drcmr);
+	writel_relaxed(DRCMR_MAPVLD | phy->idx, phy->base + reg);
+
+	dalgn = phy_readl_relaxed(phy, DALGN);
+	if (misaligned)
+		dalgn |= BIT(phy->idx);
+	else
+		dalgn &= ~BIT(phy->idx);
+	phy_writel_relaxed(phy, dalgn, DALGN);
+
+	phy_writel(phy, DCSR_STOPIRQEN | DCSR_ENDINTR | DCSR_BUSERR | DCSR_RUN,
+		    DCSR);
+}
+
+static void phy_disable(struct pxad_phy *phy)
+{
+	u32 dcsr;
+
+	if (!phy)
+		return;
+
+	dcsr = phy_readl_relaxed(phy, DCSR);
+	chan_dbg(phy->vchan, "phy=%p(%d)\n", phy, phy->idx);
+	phy_writel(phy, dcsr & ~DCSR_RUN & ~DCSR_STOPIRQEN, DCSR);
+}
+
+static void pxad_launch_chan(struct pxad_chan *chan,
+				 struct pxad_desc_sw *desc)
+{
+	chan_dbg(chan, "desc=%p\n", desc);
+	if (!chan->phy) {
+		chan->phy = lookup_phy(chan);
+		if (!chan->phy) {
+			chan_dbg(chan, "no free dma channel\n");
+			return;
+		}
+	}
+
+	/*
+	 * Program the descriptor's address into the DMA controller,
+	 * then start the DMA transaction
+	 */
+	phy_writel(chan->phy, desc->first, DDADR);
+	phy_enable(chan->phy, chan->misaligned);
+}
+
+static void set_updater_desc(struct pxad_desc_sw *sw_desc,
+			     unsigned long flags)
+{
+	struct pxad_desc_hw *updater =
+		sw_desc->hw_desc[sw_desc->nb_desc - 1];
+	dma_addr_t dma = sw_desc->hw_desc[sw_desc->nb_desc - 2]->ddadr;
+
+	updater->ddadr = DDADR_STOP;
+	updater->dsadr = dma;
+	updater->dtadr = dma + 8;
+	updater->dcmd = DCMD_WIDTH4 | DCMD_BURST32 |
+		(DCMD_LENGTH & sizeof(u32));
+	if (flags & DMA_PREP_INTERRUPT)
+		updater->dcmd |= DCMD_ENDIRQEN;
+}
+
+static bool is_desc_completed(struct virt_dma_desc *vd)
+{
+	struct pxad_desc_sw *sw_desc = to_pxad_sw_desc(vd);
+	struct pxad_desc_hw *updater =
+		sw_desc->hw_desc[sw_desc->nb_desc - 1];
+
+	return updater->dtadr != (updater->dsadr + 8);
+}
+
+static void pxad_desc_chain(struct virt_dma_desc *vd1,
+				struct virt_dma_desc *vd2)
+{
+	struct pxad_desc_sw *desc1 = to_pxad_sw_desc(vd1);
+	struct pxad_desc_sw *desc2 = to_pxad_sw_desc(vd2);
+	dma_addr_t dma_to_chain;
+
+	dma_to_chain = desc2->first;
+	desc1->hw_desc[desc1->nb_desc - 1]->ddadr = dma_to_chain;
+}
+
+static bool pxad_try_hotchain(struct virt_dma_chan *vc,
+				  struct virt_dma_desc *vd)
+{
+	struct virt_dma_desc *vd_last_issued = NULL;
+	struct pxad_chan *chan = to_pxad_chan(&vc->chan);
+
+	/*
+	 * Attempt to hot chain the tx if the phy is still running. This is
+	 * considered successful only if either the channel is still running
+	 * after the chaining, or if the chained transfer is completed after
+	 * having been hot chained.
+	 * A change of alignment is not allowed, and forbids hotchaining.
+	 */
+	if (is_chan_running(chan)) {
+		BUG_ON(list_empty(&vc->desc_issued));
+
+		if (!is_running_chan_misaligned(chan) &&
+		    to_pxad_sw_desc(vd)->misaligned)
+			return false;
+
+		vd_last_issued = list_entry(vc->desc_issued.prev,
+					    struct virt_dma_desc, node);
+		pxad_desc_chain(vd_last_issued, vd);
+		if (is_chan_running(chan) || is_desc_completed(vd_last_issued))
+			return true;
+	}
+
+	return false;
+}
+
+static unsigned int clear_chan_irq(struct pxad_phy *phy)
+{
+	u32 dcsr;
+	u32 dint = readl(phy->base + DINT);
+
+	if (!(dint & BIT(phy->idx)))
+		return DCSR_RUN;
+
+	/* clear irq */
+	dcsr = phy_readl_relaxed(phy, DCSR);
+	phy_writel(phy, dcsr, DCSR);
+	if ((dcsr & DCSR_BUSERR) && (phy->vchan))
+		chan_warn(phy->vchan, "DCSR_BUSERR\n");
+
+	return dcsr & ~DCSR_RUN;
+}
+
+static irqreturn_t pxad_chan_handler(int irq, void *dev_id)
+{
+	struct pxad_phy *phy = dev_id;
+	struct pxad_chan *chan = phy->vchan;
+	struct virt_dma_desc *vd, *tmp;
+	unsigned int dcsr;
+	unsigned long flags;
+
+	BUG_ON(!chan);
+
+	dcsr = clear_chan_irq(phy);
+	if (dcsr & DCSR_RUN)
+		return IRQ_NONE;
+
+	spin_lock_irqsave(&chan->vc.lock, flags);
+	list_for_each_entry_safe(vd, tmp, &chan->vc.desc_issued, node) {
+		chan_dbg(chan, "checking txd %p[%x]: completed=%d\n",
+			 vd, vd->tx.cookie, is_desc_completed(vd));
+		if (is_desc_completed(vd)) {
+			list_del(&vd->node);
+			vchan_cookie_complete(vd);
+		} else {
+			break;
+		}
+	}
+
+	if (dcsr & DCSR_STOPSTATE) {
+		chan_dbg(chan, "channel stopped, submitted_empty=%d issued_empty=%d",
+			 list_empty(&chan->vc.desc_submitted),
+			 list_empty(&chan->vc.desc_issued));
+		phy_writel_relaxed(phy, dcsr & ~DCSR_STOPIRQEN, DCSR);
+
+		if (!list_empty(&chan->vc.desc_issued)) {
+			vd = list_first_entry(&chan->vc.desc_issued,
+					      struct virt_dma_desc, node);
+			pxad_launch_chan(chan, to_pxad_sw_desc(vd));
+		} else {
+			chan->misaligned =
+				!list_empty(&chan->vc.desc_submitted);
+		}
+	}
+	spin_unlock_irqrestore(&chan->vc.lock, flags);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t pxad_int_handler(int irq, void *dev_id)
+{
+	struct pxad_device *pdev = dev_id;
+	struct pxad_phy *phy;
+	u32 dint = readl(pdev->base + DINT);
+	int i, ret = IRQ_NONE;
+
+	while (dint) {
+		i = __ffs(dint);
+		dint &= (dint - 1);
+		phy = &pdev->phys[i];
+		if (pxad_chan_handler(irq, phy) == IRQ_HANDLED)
+			ret = IRQ_HANDLED;
+	}
+
+	return ret;
+}
+
+static int pxad_alloc_chan_resources(struct dma_chan *dchan)
+{
+	struct pxad_chan *chan = to_pxad_chan(dchan);
+	struct pxad_device *pdev = to_pxad_dev(chan->vc.chan.device);
+
+	if (chan->desc_pool)
+		return 1;
+
+	chan->desc_pool = dma_pool_create(dma_chan_name(dchan),
+					  pdev->slave.dev,
+					  sizeof(struct pxad_desc_hw),
+					  __alignof__(struct pxad_desc_hw),
+					  0);
+	if (!chan->desc_pool) {
+		chan_err(chan, "unable to allocate descriptor pool\n");
+		return -ENOMEM;
+	}
+
+	return 1;
+}
+
+static void pxad_free_chan_resources(struct dma_chan *dchan)
+{
+	struct pxad_chan *chan = to_pxad_chan(dchan);
+
+	vchan_free_chan_resources(&chan->vc);
+	dma_pool_destroy(chan->desc_pool);
+	chan->desc_pool = NULL;
+
+}
+
+static void pxad_free_desc(struct virt_dma_desc *vd)
+{
+	int i;
+	dma_addr_t dma;
+	struct pxad_desc_sw *sw_desc = to_pxad_sw_desc(vd);
+
+	BUG_ON(sw_desc->nb_desc == 0);
+	for (i = sw_desc->nb_desc - 1; i >= 0; i--) {
+		if (i > 0)
+			dma = sw_desc->hw_desc[i - 1]->ddadr;
+		else
+			dma = sw_desc->first;
+		dma_pool_free(sw_desc->desc_pool,
+			      sw_desc->hw_desc[i], dma);
+	}
+	sw_desc->nb_desc = 0;
+	kfree(sw_desc);
+}
+
+static struct pxad_desc_sw *
+pxad_alloc_desc(struct pxad_chan *chan, unsigned int nb_hw_desc)
+{
+	struct pxad_desc_sw *sw_desc;
+	dma_addr_t dma;
+	int i;
+
+	sw_desc = kzalloc(sizeof(*sw_desc) +
+			  nb_hw_desc * sizeof(struct pxad_desc_hw *),
+			  GFP_ATOMIC);
+	if (!sw_desc) {
+		chan_err(chan, "Couldn't allocate a sw_desc\n");
+		return NULL;
+	}
+	sw_desc->desc_pool = chan->desc_pool;
+
+	for (i = 0; i < nb_hw_desc; i++) {
+		sw_desc->hw_desc[i] = dma_pool_alloc(sw_desc->desc_pool,
+						     GFP_ATOMIC, &dma);
+		if (!sw_desc->hw_desc[i]) {
+			chan_err(chan, "Couldn't allocate the %dth hw_desc from dma_pool %p\n",
+				 i, sw_desc->desc_pool);
+			goto err;
+		}
+
+		if (i == 0)
+			sw_desc->first = dma;
+		else
+			sw_desc->hw_desc[i - 1]->ddadr = dma;
+		sw_desc->nb_desc++;
+	}
+
+	return sw_desc;
+err:
+	pxad_free_desc(&sw_desc->vd);
+	return NULL;
+}
+
+static void pxad_tx_release(struct dma_async_tx_descriptor *tx)
+{
+	struct virt_dma_desc *vd = container_of(tx, struct virt_dma_desc, tx);
+	struct virt_dma_chan *vc = to_virt_chan(tx->chan);
+	struct pxad_chan *chan = to_pxad_chan(&vc->chan);
+	unsigned long flags;
+
+	chan_dbg(chan, "txd %p[%x]: freeing\n", vd, tx->cookie);
+	spin_lock_irqsave(&vc->lock, flags);
+	list_del(&vd->node);
+	spin_unlock_irqrestore(&vc->lock, flags);
+
+	pxad_free_desc(vd);
+}
+
+static dma_cookie_t pxad_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+	struct virt_dma_chan *vc = to_virt_chan(tx->chan);
+	struct pxad_chan *chan = to_pxad_chan(&vc->chan);
+	struct virt_dma_desc *vd_chained = NULL,
+		*vd = container_of(tx, struct virt_dma_desc, tx);
+	dma_cookie_t cookie;
+	unsigned long flags;
+
+	set_updater_desc(to_pxad_sw_desc(vd), tx->flags);
+
+	spin_lock_irqsave(&vc->lock, flags);
+	cookie = dma_cookie_assign(tx);
+
+	if (list_empty(&vc->desc_submitted) && pxad_try_hotchain(vc, vd)) {
+		list_move_tail(&vd->node, &vc->desc_issued);
+		chan_dbg(chan, "txd %p[%x]: submitted (hot linked)\n",
+			 vd, cookie);
+		goto out;
+	}
+
+	/*
+	 * Fallback to placing the tx in the submitted queue
+	 */
+	if (!list_empty(&vc->desc_submitted)) {
+		vd_chained = list_entry(vc->desc_submitted.prev,
+					struct virt_dma_desc, node);
+		/*
+		 * Only chain the descriptors if no new misalignment is
+		 * introduced. If a new misalignment is chained, let the channel
+		 * stop, and be relaunched in misalign mode from the irq
+		 * handler.
+		 */
+		if (chan->misaligned || !to_pxad_sw_desc(vd)->misaligned)
+			pxad_desc_chain(vd_chained, vd);
+		else
+			vd_chained = NULL;
+	}
+	chan_dbg(chan, "txd %p[%x]: submitted (%s linked)\n",
+		 vd, cookie, vd_chained ? "cold" : "not");
+	list_move_tail(&vd->node, &vc->desc_submitted);
+	chan->misaligned |= to_pxad_sw_desc(vd)->misaligned;
+
+out:
+	spin_unlock_irqrestore(&vc->lock, flags);
+	return cookie;
+}
+
+static void pxad_issue_pending(struct dma_chan *dchan)
+{
+	struct pxad_chan *chan = to_pxad_chan(dchan);
+	struct virt_dma_desc *vd_first;
+	unsigned long flags;
+
+	spin_lock_irqsave(&chan->vc.lock, flags);
+	if (list_empty(&chan->vc.desc_submitted))
+		goto out;
+
+	vd_first = list_first_entry(&chan->vc.desc_submitted,
+				    struct virt_dma_desc, node);
+	chan_dbg(chan, "txd %p[%x]", vd_first, vd_first->tx.cookie);
+
+	vchan_issue_pending(&chan->vc);
+	if (!pxad_try_hotchain(&chan->vc, vd_first))
+		pxad_launch_chan(chan, to_pxad_sw_desc(vd_first));
+out:
+	spin_unlock_irqrestore(&chan->vc.lock, flags);
+}
+
+static inline struct dma_async_tx_descriptor *
+pxad_tx_prep(struct virt_dma_chan *vc, struct virt_dma_desc *vd,
+		 unsigned long tx_flags)
+{
+	struct dma_async_tx_descriptor *tx;
+
+	tx = vchan_tx_prep(vc, vd, tx_flags);
+	tx->tx_submit = pxad_tx_submit;
+	tx->tx_release = pxad_tx_release;
+	chan_dbg(container_of(vc, struct pxad_chan, vc),
+		 "vc=%p txd=%p[%x] flags=0x%lx\n", vc, vd,
+		 vd->tx.cookie, tx_flags);
+
+	return tx;
+}
+
+static struct dma_async_tx_descriptor *
+pxad_prep_memcpy(struct dma_chan *dchan,
+		     dma_addr_t dma_dst, dma_addr_t dma_src,
+		     size_t len, unsigned long flags)
+{
+	struct pxad_chan *chan = to_pxad_chan(dchan);
+	struct pxad_desc_sw *sw_desc;
+	struct pxad_desc_hw *hw_desc;
+	u32 dcmd = chan->dcmd_base;
+	unsigned int i, nb_desc = 0;
+	size_t copy;
+
+	if (!dchan || !len || chan->dir != DMA_MEM_TO_MEM)
+		return NULL;
+
+	chan_dbg(chan, "dma_dst=0x%lx dma_src=0x%lx len=%zu flags=%lx\n",
+		 (unsigned long)dma_dst, (unsigned long)dma_src, len, flags);
+	dcmd = chan->dcmd_base | DCMD_BURST32 |
+		DCMD_INCTRGADDR | DCMD_INCSRCADDR;
+
+	nb_desc = DIV_ROUND_UP(len, PDMA_MAX_DESC_BYTES);
+	sw_desc = pxad_alloc_desc(chan, nb_desc + 1);
+	if (!sw_desc)
+		return NULL;
+	sw_desc->len = len;
+
+	if (!IS_ALIGNED(dma_src, 1 << PDMA_ALIGNMENT) ||
+	    !IS_ALIGNED(dma_dst, 1 << PDMA_ALIGNMENT))
+		sw_desc->misaligned = true;
+
+	i = 0;
+	do {
+		hw_desc = sw_desc->hw_desc[i++];
+		copy = min_t(size_t, len, PDMA_MAX_DESC_BYTES);
+		hw_desc->dcmd = dcmd | (DCMD_LENGTH & copy);
+		hw_desc->dsadr = dma_src;
+		hw_desc->dtadr = dma_dst;
+		len -= copy;
+		dma_src += copy;
+		dma_dst += copy;
+	} while (len);
+	set_updater_desc(sw_desc, flags);
+
+	return pxad_tx_prep(&chan->vc, &sw_desc->vd, flags);
+}
+
+static struct dma_async_tx_descriptor *
+pxad_prep_slave_sg(struct dma_chan *dchan, struct scatterlist *sgl,
+		   unsigned int sg_len, enum dma_transfer_direction dir,
+		   unsigned long flags, void *context)
+{
+	struct pxad_chan *chan = to_pxad_chan(dchan);
+	struct pxad_desc_sw *sw_desc;
+	size_t len, avail;
+	struct scatterlist *sg;
+	dma_addr_t dma;
+	u32 dcmd = chan->dcmd_base, dsadr = 0, dtadr = 0;
+	unsigned int nb_desc = 0, i, j = 0;
+
+	if ((sgl == NULL) || (sg_len == 0) || (chan->dir != dir))
+		return NULL;
+
+	chan_dbg(chan, "dir=%d flags=%lx\n", dir, flags);
+
+	switch (dir) {
+	case DMA_DEV_TO_MEM:
+		dsadr = chan->dev_addr;
+		dcmd |= DCMD_INCTRGADDR | DCMD_FLOWSRC;
+		break;
+	case DMA_MEM_TO_DEV:
+		dtadr = chan->dev_addr;
+		dcmd |= DCMD_INCSRCADDR | DCMD_FLOWTRG;
+		break;
+	default:
+		return NULL;
+	}
+
+	for_each_sg(sgl, sg, sg_len, i)
+		nb_desc += DIV_ROUND_UP(sg_dma_len(sg), PDMA_MAX_DESC_BYTES);
+	sw_desc = pxad_alloc_desc(chan, nb_desc + 1);
+	if (!sw_desc)
+		return NULL;
+
+	for_each_sg(sgl, sg, sg_len, i) {
+		dma = sg_dma_address(sg) + sg->offset;
+		avail = sg_dma_len(sg);
+		sw_desc->len += avail;
+
+		do {
+			len = min_t(size_t, avail, PDMA_MAX_DESC_BYTES);
+			if (dma & 0x7)
+				sw_desc->misaligned = true;
+
+			sw_desc->hw_desc[j]->dcmd = dcmd | (DCMD_LENGTH & len);
+			sw_desc->hw_desc[j]->dsadr = dsadr ? dsadr : dma;
+			sw_desc->hw_desc[j++]->dtadr = dtadr ? dtadr : dma;
+
+			dma += len;
+			avail -= len;
+		} while (avail);
+	}
+	set_updater_desc(sw_desc, flags);
+
+	return pxad_tx_prep(&chan->vc, &sw_desc->vd, flags);
+}
+
+static struct dma_async_tx_descriptor *
+pxad_prep_dma_cyclic(struct dma_chan *dchan,
+		     dma_addr_t buf_addr, size_t len, size_t period_len,
+		     enum dma_transfer_direction direction,
+		     unsigned long flags)
+{
+	struct pxad_chan *chan = to_pxad_chan(dchan);
+	struct pxad_desc_sw *sw_desc;
+	struct pxad_desc_hw **phw_desc;
+	dma_addr_t dma;
+	u32 dcmd = chan->dcmd_base, dsadr = 0, dtadr = 0;
+	unsigned int nb_desc = 0;
+
+	if (!dchan || !len || !period_len || chan->dir != direction)
+		return NULL;
+
+	chan_dbg(chan, "buf_addr=0x%lx len=%zu period=%zu dir=%d flags=%lx\n",
+		 (unsigned long)buf_addr, len, period_len, direction, flags);
+
+	/* the buffer length must be a multiple of period_len */
+	if (len % period_len != 0 || period_len > PDMA_MAX_DESC_BYTES ||
+	    !IS_ALIGNED(period_len, 1 << PDMA_ALIGNMENT))
+		return NULL;
+
+	switch (direction) {
+	case DMA_DEV_TO_MEM:
+		dsadr = chan->dev_addr;
+		dcmd |= DCMD_INCTRGADDR | DCMD_FLOWSRC | DCMD_ENDIRQEN |
+			(DCMD_LENGTH | period_len);
+		break;
+	case DMA_MEM_TO_DEV:
+		dtadr = chan->dev_addr;
+		dcmd |= DCMD_INCSRCADDR | DCMD_FLOWTRG | DCMD_ENDIRQEN |
+			(DCMD_LENGTH | period_len);
+		break;
+	default:
+		chan_err(chan, "Unsupported direction for cyclic DMA\n");
+		return NULL;
+	}
+
+	nb_desc = DIV_ROUND_UP(period_len, PDMA_MAX_DESC_BYTES);
+	nb_desc *= DIV_ROUND_UP(len, period_len);
+	sw_desc = pxad_alloc_desc(chan, nb_desc + 1);
+	if (!sw_desc)
+		return NULL;
+	sw_desc->cyclic = true;
+	sw_desc->len = len;
+
+	phw_desc = sw_desc->hw_desc;
+	dma = buf_addr;
+	do {
+		phw_desc[0]->dsadr = dsadr ? dsadr : dma;
+		phw_desc[0]->dtadr = dtadr ? dtadr : dma;
+		phw_desc[0]->dcmd = dcmd;
+		phw_desc++;
+		dma += period_len;
+		len -= period_len;
+	} while (len);
+	set_updater_desc(sw_desc, flags);
+
+	return pxad_tx_prep(&chan->vc, &sw_desc->vd, flags);
+}
+
+static int pxad_config(struct dma_chan *dchan,
+		       struct dma_slave_config *cfg)
+{
+	struct pxad_chan *chan = to_pxad_chan(dchan);
+	u32 maxburst = 0, dev_addr = 0;
+	enum dma_slave_buswidth width = DMA_SLAVE_BUSWIDTH_UNDEFINED;
+
+	if (!dchan)
+		return -EINVAL;
+
+	chan->dir = cfg->direction;
+	chan->dcmd_base = 0;
+
+	if (cfg->direction == DMA_DEV_TO_MEM) {
+		maxburst = cfg->src_maxburst;
+		width = cfg->src_addr_width;
+		dev_addr = cfg->src_addr;
+	} else if (cfg->direction == DMA_MEM_TO_DEV) {
+		maxburst = cfg->dst_maxburst;
+		width = cfg->dst_addr_width;
+		dev_addr = cfg->dst_addr;
+	}
+	chan_dbg(chan, "dev_addr=0x%x maxburst=%d width=%d  dir=%d\n",
+		 dev_addr, maxburst, width, chan->dir);
+
+	if (width == DMA_SLAVE_BUSWIDTH_1_BYTE)
+		chan->dcmd_base |= DCMD_WIDTH1;
+	else if (width == DMA_SLAVE_BUSWIDTH_2_BYTES)
+		chan->dcmd_base |= DCMD_WIDTH2;
+	else if (width == DMA_SLAVE_BUSWIDTH_4_BYTES)
+		chan->dcmd_base |= DCMD_WIDTH4;
+
+	if (maxburst == 8)
+		chan->dcmd_base |= DCMD_BURST8;
+	else if (maxburst == 16)
+		chan->dcmd_base |= DCMD_BURST16;
+	else if (maxburst == 32)
+		chan->dcmd_base |= DCMD_BURST32;
+
+	/* FIXME: drivers should be ported over to use the filter
+	 * function. Once that's done, the following two lines can
+	 * be removed.
+	 */
+	if (cfg->slave_id)
+		chan->drcmr = cfg->slave_id;
+	chan->dev_addr = dev_addr;
+	chan->misaligned = false;
+
+	return 0;
+}
+
+static int pxad_terminate_all(struct dma_chan *dchan)
+{
+	struct pxad_chan *chan = to_pxad_chan(dchan);
+	struct pxad_device *pdev = to_pxad_dev(chan->vc.chan.device);
+	struct virt_dma_desc *vd = NULL;
+	unsigned long flags;
+	struct pxad_phy *phy;
+	LIST_HEAD(head);
+
+	chan_dbg(chan, "vchan %p: terminate all\n", &chan->vc);
+
+	spin_lock_irqsave(&chan->vc.lock, flags);
+	vchan_get_all_descriptors(&chan->vc, &head);
+
+	list_for_each_entry(vd, &head, node) {
+		chan_dbg(chan, "cancelling txd %p[%x] (completed=%d)",
+			 vd, vd->tx.cookie, is_desc_completed(vd));
+	}
+
+	phy = chan->phy;
+	if (phy) {
+		phy_disable(chan->phy);
+		pxad_free_phy(chan);
+		chan->phy = NULL;
+		spin_lock(&pdev->phy_lock);
+		phy->vchan = NULL;
+		spin_unlock(&pdev->phy_lock);
+	}
+	spin_unlock_irqrestore(&chan->vc.lock, flags);
+	vchan_dma_desc_free_list(&chan->vc, &head);
+
+	return 0;
+}
+
+static unsigned int pxad_residue(struct pxad_chan *chan,
+				 dma_cookie_t cookie)
+{
+	struct virt_dma_desc *vd = NULL;
+	struct pxad_desc_sw *sw_desc = NULL;
+	struct pxad_desc_hw *hw_desc = NULL;
+	u32 curr, start, len, end, residue = 0;
+	unsigned long flags;
+	bool passed = false, prev_completed = true;
+	int i;
+
+	/*
+	 * If the channel does not have a phy pointer anymore, it has already
+	 * been completed. Therefore, its residue is 0.
+	 */
+	if (!chan->phy)
+		return 0;
+
+	if (chan->dir == DMA_DEV_TO_MEM)
+		curr = phy_readl_relaxed(chan->phy, DTADR);
+	else
+		curr = phy_readl_relaxed(chan->phy, DSADR);
+
+	spin_lock_irqsave(&chan->vc.lock, flags);
+
+	list_for_each_entry(vd, &chan->vc.desc_issued, node) {
+		sw_desc = to_pxad_sw_desc(vd);
+
+		if (vd->tx.cookie == cookie && !prev_completed) {
+			residue = sw_desc->len;
+			break;
+		}
+		prev_completed = is_desc_completed(vd);
+
+		if (vd->tx.cookie != cookie)
+			continue;
+
+		for (i = 0; i < sw_desc->nb_desc - 1; i++) {
+			hw_desc = sw_desc->hw_desc[i];
+			if (chan->dir == DMA_DEV_TO_MEM)
+				start = hw_desc->dtadr;
+			else
+				start = hw_desc->dsadr;
+			len = hw_desc->dcmd & DCMD_LENGTH;
+			end = start + len;
+
+			/*
+			 * 'passed' will be latched once we found the descriptor
+			 * which lies inside the boundaries of the curr
+			 * pointer. All descriptors that occur in the list
+			 * _after_ we found that partially handled descriptor
+			 * are still to be processed and are hence added to the
+			 * residual bytes counter.
+			 */
+
+			if (passed) {
+				residue += len;
+			} else if (curr >= start && curr <= end) {
+				residue += end - curr;
+				passed = true;
+			}
+		}
+
+		break;
+	}
+
+	spin_unlock_irqrestore(&chan->vc.lock, flags);
+	chan_dbg(chan, "txd %p[%x] sw_desc=%p: %d\n",
+		 vd, cookie, sw_desc, residue);
+	return residue;
+}
+
+static enum dma_status pxad_tx_status(struct dma_chan *dchan,
+				      dma_cookie_t cookie,
+				      struct dma_tx_state *txstate)
+{
+	struct pxad_chan *chan = to_pxad_chan(dchan);
+	enum dma_status ret;
+
+	ret = dma_cookie_status(dchan, cookie, txstate);
+	if (likely(ret != DMA_ERROR))
+		dma_set_residue(txstate, pxad_residue(chan, cookie));
+
+	return ret;
+}
+
+static void pxad_free_channels(struct dma_device *dmadev)
+{
+	struct pxad_chan *c, *cn;
+
+	list_for_each_entry_safe(c, cn, &dmadev->channels,
+				 vc.chan.device_node) {
+		list_del(&c->vc.chan.device_node);
+		tasklet_kill(&c->vc.task);
+	}
+}
+
+static int pxad_remove(struct platform_device *op)
+{
+	struct pxad_device *pdev = platform_get_drvdata(op);
+
+	pxad_free_channels(&pdev->slave);
+	dma_async_device_unregister(&pdev->slave);
+	return 0;
+}
+
+static int pxad_init_phys(struct platform_device *op,
+			  struct pxad_device *pdev,
+			  unsigned int nb_phy_chans)
+{
+	int irq0, irq, nr_irq = 0, i, ret;
+	struct pxad_phy *phy;
+
+	irq0 = platform_get_irq(op, 0);
+	if (irq0 < 0)
+		return irq0;
+
+	pdev->phys = devm_kcalloc(&op->dev, nb_phy_chans,
+				  sizeof(pdev->phys[0]), GFP_KERNEL);
+	if (!pdev->phys)
+		return -ENOMEM;
+
+	for (i = 0; i < nb_phy_chans; i++)
+		if (platform_get_irq(op, i) > 0)
+			nr_irq++;
+
+	for (i = 0; i < nb_phy_chans; i++) {
+		phy = &pdev->phys[i];
+		phy->base = pdev->base;
+		phy->idx = i;
+		irq = platform_get_irq(op, i);
+		if ((nr_irq > 1) && (irq > 0))
+			ret = devm_request_irq(&op->dev, irq,
+					       pxad_chan_handler,
+					       IRQF_SHARED, "pxa-dma", phy);
+		if ((nr_irq == 1) && (irq == 0))
+			ret = devm_request_irq(&op->dev, irq0,
+					       pxad_int_handler,
+					       IRQF_SHARED, "pxa-dma", pdev);
+		if (ret) {
+			pdma_err(pdev, "can't request irq %d:%d\n", irq, ret);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+static const struct of_device_id const pxad_dt_ids[] = {
+	{ .compatible = "marvell,pdma-1.0", },
+	{}
+};
+MODULE_DEVICE_TABLE(of, pxad_dt_ids);
+
+static struct dma_chan *pxad_dma_xlate(struct of_phandle_args *dma_spec,
+					   struct of_dma *ofdma)
+{
+	struct pxad_device *d = ofdma->of_dma_data;
+	struct dma_chan *chan;
+
+	chan = dma_get_any_slave_channel(&d->slave);
+	if (!chan)
+		return NULL;
+
+	to_pxad_chan(chan)->drcmr = dma_spec->args[0];
+	to_pxad_chan(chan)->prio = dma_spec->args[1];
+
+	return chan;
+}
+
+static int pxad_init_dmadev(struct platform_device *op,
+			    struct pxad_device *pdev,
+			    unsigned int nr_phy_chans)
+{
+	int ret;
+	unsigned int i;
+	struct pxad_chan *c;
+
+	pdev->nr_chans = nr_phy_chans;
+	INIT_LIST_HEAD(&pdev->slave.channels);
+	pdev->slave.device_alloc_chan_resources = pxad_alloc_chan_resources;
+	pdev->slave.device_free_chan_resources = pxad_free_chan_resources;
+	pdev->slave.device_tx_status = pxad_tx_status;
+	pdev->slave.device_issue_pending = pxad_issue_pending;
+	pdev->slave.device_config = pxad_config;
+	pdev->slave.device_terminate_all = pxad_terminate_all;
+
+	if (op->dev.coherent_dma_mask)
+		dma_set_mask(&op->dev, op->dev.coherent_dma_mask);
+	else
+		dma_set_mask(&op->dev, DMA_BIT_MASK(32));
+
+	ret = pxad_init_phys(op, pdev, nr_phy_chans);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < nr_phy_chans; i++) {
+		c = devm_kzalloc(&op->dev, sizeof(*c), GFP_KERNEL);
+		if (!c)
+			return -ENOMEM;
+		c->vc.desc_free = pxad_free_desc;
+		vchan_init(&c->vc, &pdev->slave);
+	}
+
+	return dma_async_device_register(&pdev->slave);
+}
+
+static int pxad_probe(struct platform_device *op)
+{
+	struct pxad_device *pdev;
+	const struct of_device_id *of_id;
+	struct mmp_dma_platdata *pdata = dev_get_platdata(&op->dev);
+	struct resource *iores;
+	int ret, dma_channels = 0;
+	const enum dma_slave_buswidth widths =
+		DMA_SLAVE_BUSWIDTH_1_BYTE   | DMA_SLAVE_BUSWIDTH_2_BYTES |
+		DMA_SLAVE_BUSWIDTH_4_BYTES;
+
+	pdev = devm_kzalloc(&op->dev, sizeof(*pdev), GFP_KERNEL);
+	if (!pdev)
+		return -ENOMEM;
+
+	spin_lock_init(&pdev->phy_lock);
+
+	iores = platform_get_resource(op, IORESOURCE_MEM, 0);
+	pdev->base = devm_ioremap_resource(&op->dev, iores);
+	if (IS_ERR(pdev->base))
+		return PTR_ERR(pdev->base);
+
+	of_id = of_match_device(pxad_dt_ids, &op->dev);
+	if (of_id)
+		of_property_read_u32(op->dev.of_node, "#dma-channels",
+				     &dma_channels);
+	else if (pdata && pdata->dma_channels)
+		dma_channels = pdata->dma_channels;
+	else
+		dma_channels = 32;	/* default 32 channel */
+
+	dma_cap_set(DMA_SLAVE, pdev->slave.cap_mask);
+	dma_cap_set(DMA_MEMCPY, pdev->slave.cap_mask);
+	dma_cap_set(DMA_CYCLIC, pdev->slave.cap_mask);
+	dma_cap_set(DMA_PRIVATE, pdev->slave.cap_mask);
+	pdev->slave.device_prep_dma_memcpy = pxad_prep_memcpy;
+	pdev->slave.device_prep_slave_sg = pxad_prep_slave_sg;
+	pdev->slave.device_prep_dma_cyclic = pxad_prep_dma_cyclic;
+
+	pdev->slave.copy_align = PDMA_ALIGNMENT;
+	pdev->slave.src_addr_widths = widths;
+	pdev->slave.dst_addr_widths = widths;
+	pdev->slave.directions = BIT(DMA_MEM_TO_DEV) | BIT(DMA_DEV_TO_MEM);
+	pdev->slave.residue_granularity = DMA_RESIDUE_GRANULARITY_DESCRIPTOR;
+
+	pdev->slave.dev = &op->dev;
+	ret = pxad_init_dmadev(op, pdev, dma_channels);
+	if (ret) {
+		pdma_err(pdev, "unable to register\n");
+		return ret;
+	}
+
+	if (op->dev.of_node) {
+		/* Device-tree DMA controller registration */
+		ret = of_dma_controller_register(op->dev.of_node,
+						 pxad_dma_xlate, pdev);
+		if (ret < 0) {
+			pdma_err(pdev, "of_dma_controller_register failed\n");
+			return ret;
+		}
+	}
+
+	platform_set_drvdata(op, pdev);
+	dev_info(pdev->slave.dev, "initialized %d channels\n", dma_channels);
+	return 0;
+}
+
+static const struct platform_device_id pxad_id_table[] = {
+	{ "pxa-dma", },
+	{ },
+};
+
+static struct platform_driver pxad_driver = {
+	.driver		= {
+		.name	= "pxa-dma",
+		.of_match_table = pxad_dt_ids,
+	},
+	.id_table	= pxad_id_table,
+	.probe		= pxad_probe,
+	.remove		= pxad_remove,
+};
+
+bool pxad_filter_fn(struct dma_chan *chan, void *param)
+{
+	struct pxad_chan *c = to_pxad_chan(chan);
+	struct pxad_param *p = param;
+
+	if (chan->device->dev->driver != &pxad_driver.driver)
+		return false;
+
+	c->drcmr = p->drcmr;
+	c->prio = p->prio;
+
+	return true;
+}
+EXPORT_SYMBOL_GPL(pxad_filter_fn);
+
+module_platform_driver(pxad_driver);
+
+MODULE_DESCRIPTION("Marvell PXA Peripheral DMA Driver");
+MODULE_AUTHOR("Robert Jarzmik <robert.jarzmik@free.fr>");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/dma/pxa-dma.h b/include/linux/dma/pxa-dma.h
new file mode 100644
index 0000000..3edc992
--- /dev/null
+++ b/include/linux/dma/pxa-dma.h
@@ -0,0 +1,27 @@ 
+#ifndef _PXA_DMA_H_
+#define _PXA_DMA_H_
+
+enum pxad_chan_prio {
+	PXAD_PRIO_HIGHEST = 0,
+	PXAD_PRIO_NORMAL,
+	PXAD_PRIO_LOW,
+	PXAD_PRIO_LOWEST,
+};
+
+struct pxad_param {
+	unsigned int drcmr;
+	enum pxad_chan_prio prio;
+};
+
+struct dma_chan;
+
+#ifdef CONFIG_PXA_DMA
+bool pxad_filter_fn(struct dma_chan *chan, void *param);
+#else
+static inline bool pxad_filter_fn(struct dma_chan *chan, void *param)
+{
+	return false;
+}
+#endif
+
+#endif /* _PXA_DMA_H_ */