diff mbox

[5/6] dmaengine: rcar-dmac: Add Renesas R-Car Gen2 DMA Controller (DMAC) driver

Message ID 7ae1dbb4f4a79a529250600cc2eeb7c31a644938.1406766014.git.horms+renesas@verge.net.au (mailing list archive)
State Rejected
Delegated to: Vinod Koul
Headers show

Commit Message

Simon Horman July 31, 2014, 12:34 a.m. UTC
From: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>

The DMAC is a general purpose multi-channel DMA controller that supports
both slave and memcpy transfers.

The driver currently supports the DMAC found in the r8a7790 and r8a7791
SoCs. Support for compatible DMA controllers (such as the audio DMAC)
will be added later.

Feature-wise, automatic hardware handling of descriptors chains isn't
supported yet. LPAE support is implemented.

Signed-off-by: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Signed-off-by: Simon Horman <horms+renesas@verge.net.au>
---
 drivers/dma/sh/Kconfig     |    8 +
 drivers/dma/sh/Makefile    |    1 +
 drivers/dma/sh/rcar-dmac.c | 1525 ++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 1534 insertions(+)
 create mode 100644 drivers/dma/sh/rcar-dmac.c

Comments

Vinod Koul July 31, 2014, 11:44 a.m. UTC | #1
On Thu, Jul 31, 2014 at 09:34:08AM +0900, Simon Horman wrote:
> From: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
> 
> The DMAC is a general purpose multi-channel DMA controller that supports
> both slave and memcpy transfers.
> 
> The driver currently supports the DMAC found in the r8a7790 and r8a7791
> SoCs. Support for compatible DMA controllers (such as the audio DMAC)
> will be added later.
> 
> Feature-wise, automatic hardware handling of descriptors chains isn't
> supported yet. LPAE support is implemented.
> 
> Signed-off-by: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
> Signed-off-by: Simon Horman <horms+renesas@verge.net.au>
> ---

> +static int rcar_dmac_alloc_chan_resources(struct dma_chan *chan)
> +{
> +	struct rcar_dmac_chan *rchan = to_rcar_dmac_chan(chan);
> +	int ret;
> +
> +	INIT_LIST_HEAD(&rchan->desc.free);
> +	INIT_LIST_HEAD(&rchan->desc.pending);
> +	INIT_LIST_HEAD(&rchan->desc.active);
> +	INIT_LIST_HEAD(&rchan->desc.done);
> +	INIT_LIST_HEAD(&rchan->desc.wait);
> +	INIT_LIST_HEAD(&rchan->desc.hw_free);
> +	INIT_LIST_HEAD(&rchan->desc.pages);
Seriously we need so many lists? 1st three makes sense but what is delta b/w
done and free. Once a descriptor is done it should be moved to freee list.

What does wait list mean and the last two?


> +static struct dma_async_tx_descriptor *
> +rcar_dmac_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr,
> +			  size_t buf_len, size_t period_len,
> +			  enum dma_transfer_direction dir, unsigned long flags,
> +			  void *context)
> +{
> +	struct rcar_dmac_chan *rchan = to_rcar_dmac_chan(chan);
> +	struct dma_async_tx_descriptor *desc;
> +	struct scatterlist *sgl;
> +	unsigned int sg_len;
> +	unsigned int i;
> +
> +	/* Someone calling slave DMA on a generic channel? */
> +	if (rchan->mid_rid < 0 || buf_len < period_len) {
> +		dev_warn(chan->device->dev,
> +			"%s: bad parameter: buf_len=%zu, period_len=%zu, id=%d\n",
> +			__func__, buf_len, period_len, rchan->mid_rid);
> +		return NULL;
> +	}
> +
> +	sg_len = buf_len / period_len;
> +	if (sg_len > RCAR_DMAC_MAX_SG_LEN) {
> +		dev_err(chan->device->dev,
> +			"chan%u: sg length %d exceds limit %d",
> +			rchan->index, sg_len, RCAR_DMAC_MAX_SG_LEN);
> +		return NULL;
> +	}
> +
> +	/*
> +	 * Allocate the sg list dynamically as it would consumer too much stack
> +	 * space.
> +	 */
> +	sgl = kcalloc(sg_len, sizeof(*sgl), GFP_KERNEL);
GFP_NOWAIT, prepare calls can be invoked from atomic context
> +	if (!sgl)
> +		return NULL;
Wouldn't ERR_PTR help here?

> +static void rcar_dmac_slave_config(struct rcar_dmac_chan *chan,
> +				  struct dma_slave_config *cfg)
> +{
> +	/*
> +	 * We could lock this, but you shouldn't be configuring the
> +	 * channel, while using it...
> +	 */
> +
> +	if (cfg->direction == DMA_DEV_TO_MEM) {
> +		chan->slave_addr = cfg->src_addr;
> +		chan->xfer_size = cfg->src_addr_width;
> +	} else {
> +		chan->slave_addr = cfg->dst_addr;
> +		chan->xfer_size = cfg->dst_addr_width;
> +	}
okay this bit needs modification. The channel can be configured in any
direction. SO you can have one descriptor doing DMA_DEV_TO_MEM and another
DMA_MEM_TO_DEV. The prep_ calls have direction arg to be used, so please
store both :)

> +}
> +
> +static int rcar_dmac_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
> +			     unsigned long arg)
> +{
> +	struct rcar_dmac_chan *rchan = to_rcar_dmac_chan(chan);
> +
> +	switch (cmd) {
> +	case DMA_TERMINATE_ALL:
> +		rcar_dmac_chan_terminate_all(rchan);
> +		break;
> +
> +	case DMA_SLAVE_CONFIG:
> +		rcar_dmac_slave_config(rchan, (struct dma_slave_config *)arg);
> +		break;
> +
> +	default:
> +		return -ENXIO;
> +	}
> +
> +	return 0;
> +}
> +
> +static size_t rcar_dmac_chan_get_residue(struct rcar_dmac_chan *chan,
> +					 dma_cookie_t cookie)
> +{
> +	struct rcar_dmac_desc *desc = chan->desc.running;
> +	struct rcar_dmac_hw_desc *hwdesc;
> +	size_t residue = 0;
> +
> +	if (!desc)
> +		return 0;
Why?
We should be able to query even when channel is not running, right?

> +static int rcar_dmac_slave_caps(struct dma_chan *chan,
> +				struct dma_slave_caps *caps)
> +{
> +	memset(caps, 0, sizeof(*caps));
> +
> +	/*
> +	 * The device supports all widths from 1 to 64 bytes. As the
> +	 * dma_slave_buswidth enumeration is limited to 8 bytes, set the
> +	 * numerical value directly.
> +	 */
> +	caps->src_addr_widths = 0x7f;
> +	caps->dstn_addr_widths = 0x7f;
no magic numbers pls

> +	caps->directions = BIT(DMA_MEM_TO_DEV) | BIT(DMA_DEV_TO_MEM);
> +	caps->cmd_terminate = true;
> +	caps->residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
pls do initialize pause.

> +static irqreturn_t rcar_dmac_isr_transfer_end(struct rcar_dmac_chan *chan)
> +{
> +	struct rcar_dmac_desc *desc = chan->desc.running;
> +	struct rcar_dmac_hw_desc *hwdesc;
> +	irqreturn_t ret = IRQ_WAKE_THREAD;
> +
> +	if (WARN_ON(!desc)) {
> +		/*
> +		 * This should never happen, there should always be
> +		 * a running descriptor when a transfer ends. Warn and
> +		 * return.
> +		 */
> +		return IRQ_NONE;
> +	}
> +
> +	/*
> +	 * If we haven't completed the last hardware descriptor simply move to
> +	 * the next one. Only wake the IRQ thread if the transfer is cyclic.
> +	 */
> +	hwdesc = desc->running;
> +	if (!list_is_last(&hwdesc->node, &desc->hwdescs)) {
> +		desc->running = list_next_entry(hwdesc, node);
> +		if (!desc->cyclic)
> +			ret = IRQ_HANDLED;
> +		goto done;
> +	}
> +
> +	/*
> +	 * We've completed the last hardware. If the transfer is cyclic, move
> +	 * back to the first one.
> +	 */
> +	if (desc->cyclic) {
> +		desc->running = list_first_entry(&desc->hwdescs,
> +						 struct rcar_dmac_hw_desc,
> +						 node);
> +		goto done;
> +	}
> +
> +	/* The descriptor is complete, move it to the done list. */
> +	list_move_tail(&desc->node, &chan->desc.done);
> +	chan->desc.submitted--;
no lock protecting this one? I am sure you would be running a multi-core
system!

> +#if defined(CONFIG_PM_SLEEP) || defined(CONFIG_PM_RUNTIME)
> +static int rcar_dmac_suspend(struct device *dev)
> +{
> +	return 0;
> +}
> +
> +static int rcar_dmac_resume(struct device *dev)
> +{
> +	struct rcar_dmac *dmac = dev_get_drvdata(dev);
> +
> +	return rcar_dmac_init(dmac);
> +}
> +#endif
I dont think you need these as you are using PM macros.

> +static int rcar_dmac_chan_probe(struct rcar_dmac *dmac,
> +				struct rcar_dmac_chan *rchan,
> +				unsigned int index)
> +{
> +	struct platform_device *pdev = to_platform_device(dmac->dev);
> +	struct dma_chan *chan = &rchan->chan;
> +	char irqname[5];
> +	int irq;
> +	int ret;
> +
> +	rchan->index = index;
> +	rchan->iomem = dmac->iomem + RCAR_DMAC_CHAN_OFFSET(index);
> +	rchan->mid_rid = -EINVAL;
> +
> +	spin_lock_init(&rchan->lock);
> +	mutex_init(&rchan->power_lock);
> +
> +	/* Request the channel interrupt. */
> +	sprintf(irqname, "ch%u", index);
> +	irq = platform_get_irq_byname(pdev, irqname);
> +	if (irq < 0) {
> +		dev_err(dmac->dev, "no IRQ specified for channel %u\n", index);
> +		return -ENODEV;
> +	}
> +
> +	rchan->irqname = devm_kmalloc(dmac->dev,
> +				      strlen(dev_name(dmac->dev)) + 4,
> +				      GFP_KERNEL);
> +	if (!rchan->irqname)
> +		return -ENOMEM;
> +	sprintf(rchan->irqname, "%s:%u", dev_name(dmac->dev), index);
> +
> +	ret = devm_request_threaded_irq(dmac->dev, irq, rcar_dmac_isr_channel,
> +					rcar_dmac_isr_channel_thread, 0,
> +					rchan->irqname, rchan);
I know sh drivers do this but WHY. All other dmac drivers are happy with API
and do tasklets.

> +static int rcar_dmac_probe(struct platform_device *pdev)
> +{
> +	struct dma_device *engine;
> +	struct rcar_dmac *dmac;
> +	struct resource *mem;
> +	unsigned int i;
> +	int irq;
> +	int ret;
> +
> +	dmac = devm_kzalloc(&pdev->dev, sizeof(*dmac), GFP_KERNEL);
> +	if (!dmac)
> +		return -ENOMEM;
> +
> +	dmac->dev = &pdev->dev;
> +	platform_set_drvdata(pdev, dmac);
> +
> +	ret = rcar_dmac_parse_of(&pdev->dev, dmac);
> +	if (ret < 0)
> +		return ret;
> +
> +	dmac->channels = devm_kcalloc(&pdev->dev, dmac->n_channels,
> +				      sizeof(*dmac->channels), GFP_KERNEL);
> +	if (!dmac->channels)
> +		return -ENOMEM;
> +
> +	/* Request resources. */
> +	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
> +	dmac->iomem = devm_ioremap_resource(&pdev->dev, mem);
> +	if (IS_ERR(dmac->iomem))
> +		return PTR_ERR(dmac->iomem);
> +
> +	irq = platform_get_irq_byname(pdev, "error");
> +	if (irq < 0) {
> +		dev_err(&pdev->dev, "no error IRQ specified\n");
> +		return -ENODEV;
> +	}
> +
> +	dmac->irqname = devm_kmalloc(dmac->dev, strlen(dev_name(dmac->dev)) + 7,
> +				     GFP_KERNEL);
> +	if (!dmac->irqname)
> +		return -ENOMEM;
> +	sprintf(dmac->irqname, "%s:error", dev_name(&pdev->dev));
> +
> +	ret = devm_request_irq(&pdev->dev, irq, rcar_dmac_isr_error, 0,
> +			       dmac->irqname, dmac);
and you can get isr invoked after this while you are still enabling device.


> +static int rcar_dmac_remove(struct platform_device *pdev)
> +{
> +	struct rcar_dmac *dmac = platform_get_drvdata(pdev);
> +	unsigned int i;
> +
> +	of_dma_controller_free(pdev->dev.of_node);
> +	dma_async_device_unregister(&dmac->engine);
> +
> +	pm_runtime_disable(&pdev->dev);
> +
> +	for (i = 0; i < dmac->n_channels; ++i)
> +		mutex_destroy(&dmac->channels[i].power_lock);
> +
> +	return 0;
and what prevents you getting irq and all the irq threads running and
executed before you do remove. Lots of potential race conditions here!
Laurent Pinchart Aug. 4, 2014, 4:30 p.m. UTC | #2
Hi Vinod,

Thank you for the review. Please see below for a couple of questions.

On Thursday 31 July 2014 17:14:39 Vinod Koul wrote:
> On Thu, Jul 31, 2014 at 09:34:08AM +0900, Simon Horman wrote:
> > From: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
> > 
> > The DMAC is a general purpose multi-channel DMA controller that supports
> > both slave and memcpy transfers.
> > 
> > The driver currently supports the DMAC found in the r8a7790 and r8a7791
> > SoCs. Support for compatible DMA controllers (such as the audio DMAC)
> > will be added later.
> > 
> > Feature-wise, automatic hardware handling of descriptors chains isn't
> > supported yet. LPAE support is implemented.
> > 
> > Signed-off-by: Laurent Pinchart
> > <laurent.pinchart+renesas@ideasonboard.com>
> > Signed-off-by: Simon Horman <horms+renesas@verge.net.au>
> > ---
> > 
> > +static int rcar_dmac_alloc_chan_resources(struct dma_chan *chan)
> > +{
> > +	struct rcar_dmac_chan *rchan = to_rcar_dmac_chan(chan);
> > +	int ret;
> > +
> > +	INIT_LIST_HEAD(&rchan->desc.free);
> > +	INIT_LIST_HEAD(&rchan->desc.pending);
> > +	INIT_LIST_HEAD(&rchan->desc.active);
> > +	INIT_LIST_HEAD(&rchan->desc.done);
> > +	INIT_LIST_HEAD(&rchan->desc.wait);
> > +	INIT_LIST_HEAD(&rchan->desc.hw_free);
> > +	INIT_LIST_HEAD(&rchan->desc.pages);
> 
> Seriously we need so many lists? 1st three makes sense but what is delta b/w
> done and free. Once a descriptor is done it should be moved to freee list.
> 
> What does wait list mean and the last two?

The free list contains descriptors that have been allocated and are not used. 
Those can be descriptors preallocated and not used yet, or descriptors that 
have been used and recycled.

The pending list contains descriptors submitted (through dmaengine_submit) but 
not issued (with dma_async_issue_pending) yet.

The active list contains descriptors issued with dma_async_issue_pending. The 
list tail is or will be processed by the hardware, and the other elements in 
the list will be processed in turn.

The done list contains descriptors that have been processed by the hardware 
but haven't been processed by the interrupt thread (I'll discuss thread 
vs.tasklet below) yet. Descriptors are added to the done list in the interrupt 
handler, and taken from the list by the interrupt thread.

The wait list contains descriptors processed by the interrupt thread for which 
the completion callback has been called, but that haven't been acked (tested 
with async_tx_test_ack) yet. Descriptors are added to that list after calling 
their callback function, and the list is then processed by 
rcar_dmac_desc_recycle_acked (called both from the interrupt thread and from 
the descriptor allocation function) to move all acked descriptors back to the 
free list. 

The hw_free and pages lists are used for memory management, they don't contain 
descriptors.

Do you see any specific list that you think should be removed ? If so, how ?

> > +static struct dma_async_tx_descriptor *
> > +rcar_dmac_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr,
> > +			  size_t buf_len, size_t period_len,
> > +			  enum dma_transfer_direction dir, unsigned long flags,
> > +			  void *context)
> > +{
> > +	struct rcar_dmac_chan *rchan = to_rcar_dmac_chan(chan);
> > +	struct dma_async_tx_descriptor *desc;
> > +	struct scatterlist *sgl;
> > +	unsigned int sg_len;
> > +	unsigned int i;
> > +
> > +	/* Someone calling slave DMA on a generic channel? */
> > +	if (rchan->mid_rid < 0 || buf_len < period_len) {
> > +		dev_warn(chan->device->dev,
> > +			"%s: bad parameter: buf_len=%zu, period_len=%zu, id=%d\n",
> > +			__func__, buf_len, period_len, rchan->mid_rid);
> > +		return NULL;
> > +	}
> > +
> > +	sg_len = buf_len / period_len;
> > +	if (sg_len > RCAR_DMAC_MAX_SG_LEN) {
> > +		dev_err(chan->device->dev,
> > +			"chan%u: sg length %d exceds limit %d",
> > +			rchan->index, sg_len, RCAR_DMAC_MAX_SG_LEN);
> > +		return NULL;
> > +	}
> > +
> > +	/*
> > +	 * Allocate the sg list dynamically as it would consumer too much 
stack
> > +	 * space.
> > +	 */
> > +	sgl = kcalloc(sg_len, sizeof(*sgl), GFP_KERNEL);
> 
> GFP_NOWAIT, prepare calls can be invoked from atomic context

I'll rework the driver in that regard.

> > +	if (!sgl)
> > +		return NULL;
> 
> Wouldn't ERR_PTR help here?

If the callers are prepared to deal with non-NULL error values, sure. I'll let 
you guess whether that's documented :-D

> > +static void rcar_dmac_slave_config(struct rcar_dmac_chan *chan,
> > +				  struct dma_slave_config *cfg)
> > +{
> > +	/*
> > +	 * We could lock this, but you shouldn't be configuring the
> > +	 * channel, while using it...
> > +	 */
> > +
> > +	if (cfg->direction == DMA_DEV_TO_MEM) {
> > +		chan->slave_addr = cfg->src_addr;
> > +		chan->xfer_size = cfg->src_addr_width;
> > +	} else {
> > +		chan->slave_addr = cfg->dst_addr;
> > +		chan->xfer_size = cfg->dst_addr_width;
> > +	}
> 
> okay this bit needs modification. The channel can be configured in any
> direction. SO you can have one descriptor doing DMA_DEV_TO_MEM and another
> DMA_MEM_TO_DEV. The prep_ calls have direction arg to be used, so please
> store both :)

Right, but before fixing that, let's document exactly how the struct 
dma_slave_config field are supposed to be used.

The direction field is documented in the header as

 * @direction: whether the data shall go in or out on this slave
 * channel, right now. DMA_MEM_TO_DEV and DMA_DEV_TO_MEM are
 * legal values.

That seems to contradict your statement.

> > +}
> > +
> > +static int rcar_dmac_control(struct dma_chan *chan, enum dma_ctrl_cmd
> > cmd,
> > +			     unsigned long arg)
> > +{
> > +	struct rcar_dmac_chan *rchan = to_rcar_dmac_chan(chan);
> > +
> > +	switch (cmd) {
> > +	case DMA_TERMINATE_ALL:
> > +		rcar_dmac_chan_terminate_all(rchan);
> > +		break;
> > +
> > +	case DMA_SLAVE_CONFIG:
> > +		rcar_dmac_slave_config(rchan, (struct dma_slave_config *)arg);
> > +		break;
> > +
> > +	default:
> > +		return -ENXIO;
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> > +static size_t rcar_dmac_chan_get_residue(struct rcar_dmac_chan *chan,
> > +					 dma_cookie_t cookie)
> > +{
> > +	struct rcar_dmac_desc *desc = chan->desc.running;
> > +	struct rcar_dmac_hw_desc *hwdesc;
> > +	size_t residue = 0;
> > +
> > +	if (!desc)
> > +		return 0;
> 
> Why?
> We should be able to query even when channel is not running, right?

Good question, should we ? :-)

This would require going through all lists to find the descriptor 
corresponding to the cookie, and returning either 0 (if the descriptor has 
been processed completely) or the descriptor length (if it hasn't been 
processed yet). This is potentially costly.

The first case can be easily handled using the cookie only, when 
dma_cookie_status() state returns DMA_COMPLETE then we know that the residue 
is 0. This is actually the current behaviour, as dma_cookie_status() zeroes 
the residue field.

Is the second case something we need to support ? chan->desc.running is only 
NULL when there's no descriptor to process. In that case the cookie can either 
correspond to a descriptor already complete (first case), a descriptor 
prepared but not submitted or an invalid descriptor (in which case we can't 
report the residue anyway). Is there really a use case for reporting the 
residue for a descriptor not submitted ? That seems like a bad use of the API 
to me, I think it would be better to forbid it.

> > +static int rcar_dmac_slave_caps(struct dma_chan *chan,
> > +				struct dma_slave_caps *caps)
> > +{
> > +	memset(caps, 0, sizeof(*caps));
> > +
> > +	/*
> > +	 * The device supports all widths from 1 to 64 bytes. As the
> > +	 * dma_slave_buswidth enumeration is limited to 8 bytes, set the
> > +	 * numerical value directly.
> > +	 */
> > +	caps->src_addr_widths = 0x7f;
> > +	caps->dstn_addr_widths = 0x7f;
> 
> no magic numbers pls

Come on, 0x7f is hardly magic ;-)

Should I add DMA_SLAVE_BUSWIDTH_16_BYTES, DMA_SLAVE_BUSWIDTH_32_BYTES and 
DMA_SLAVE_BUSWIDTH_64_BYTES to enum dma_slave_buswidth ?

> > +	caps->directions = BIT(DMA_MEM_TO_DEV) | BIT(DMA_DEV_TO_MEM);
> > +	caps->cmd_terminate = true;
> > +	caps->residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
> 
> pls do initialize pause.

It's initialized by the memset above.

> > +static irqreturn_t rcar_dmac_isr_transfer_end(struct rcar_dmac_chan
> > *chan)
> > +{
> > +	struct rcar_dmac_desc *desc = chan->desc.running;
> > +	struct rcar_dmac_hw_desc *hwdesc;
> > +	irqreturn_t ret = IRQ_WAKE_THREAD;
> > +
> > +	if (WARN_ON(!desc)) {
> > +		/*
> > +		 * This should never happen, there should always be
> > +		 * a running descriptor when a transfer ends. Warn and
> > +		 * return.
> > +		 */
> > +		return IRQ_NONE;
> > +	}
> > +
> > +	/*
> > +	 * If we haven't completed the last hardware descriptor simply move 
to
> > +	 * the next one. Only wake the IRQ thread if the transfer is cyclic.
> > +	 */
> > +	hwdesc = desc->running;
> > +	if (!list_is_last(&hwdesc->node, &desc->hwdescs)) {
> > +		desc->running = list_next_entry(hwdesc, node);
> > +		if (!desc->cyclic)
> > +			ret = IRQ_HANDLED;
> > +		goto done;
> > +	}
> > +
> > +	/*
> > +	 * We've completed the last hardware. If the transfer is cyclic, move
> > +	 * back to the first one.
> > +	 */
> > +	if (desc->cyclic) {
> > +		desc->running = list_first_entry(&desc->hwdescs,
> > +						 struct rcar_dmac_hw_desc,
> > +						 node);
> > +		goto done;
> > +	}
> > +
> > +	/* The descriptor is complete, move it to the done list. */
> > +	list_move_tail(&desc->node, &chan->desc.done);
> > +	chan->desc.submitted--;
> 
> no lock protecting this one? I am sure you would be running a multi-core
> system!

The rcar_dmac_isr_transfer_end function is called from rcar_dmac_isr_channel 
with the channel spinlock held. The submitted field is also updated in 
rcar_dmac_tx_submit with the same spinlock held.

> > +#if defined(CONFIG_PM_SLEEP) || defined(CONFIG_PM_RUNTIME)
> > +static int rcar_dmac_suspend(struct device *dev)
> > +{
> > +	return 0;
> > +}
> > +
> > +static int rcar_dmac_resume(struct device *dev)
> > +{
> > +	struct rcar_dmac *dmac = dev_get_drvdata(dev);
> > +
> > +	return rcar_dmac_init(dmac);
> > +}
> > +#endif
> 
> I dont think you need these as you are using PM macros.

Then gcc will complain above functions being defined and not used.

> > +static int rcar_dmac_chan_probe(struct rcar_dmac *dmac,
> > +				struct rcar_dmac_chan *rchan,
> > +				unsigned int index)
> > +{
> > +	struct platform_device *pdev = to_platform_device(dmac->dev);
> > +	struct dma_chan *chan = &rchan->chan;
> > +	char irqname[5];
> > +	int irq;
> > +	int ret;
> > +
> > +	rchan->index = index;
> > +	rchan->iomem = dmac->iomem + RCAR_DMAC_CHAN_OFFSET(index);
> > +	rchan->mid_rid = -EINVAL;
> > +
> > +	spin_lock_init(&rchan->lock);
> > +	mutex_init(&rchan->power_lock);
> > +
> > +	/* Request the channel interrupt. */
> > +	sprintf(irqname, "ch%u", index);
> > +	irq = platform_get_irq_byname(pdev, irqname);
> > +	if (irq < 0) {
> > +		dev_err(dmac->dev, "no IRQ specified for channel %u\n", index);
> > +		return -ENODEV;
> > +	}
> > +
> > +	rchan->irqname = devm_kmalloc(dmac->dev,
> > +				      strlen(dev_name(dmac->dev)) + 4,
> > +				      GFP_KERNEL);
> > +	if (!rchan->irqname)
> > +		return -ENOMEM;
> > +	sprintf(rchan->irqname, "%s:%u", dev_name(dmac->dev), index);
> > +
> > +	ret = devm_request_threaded_irq(dmac->dev, irq, 
rcar_dmac_isr_channel,
> > +					rcar_dmac_isr_channel_thread, 0,
> > +					rchan->irqname, rchan);
> 
> I know sh drivers do this but WHY. All other dmac drivers are happy with API
> and do tasklets.

Honestly, I don't know. I've just copied that code from shdma-base.c. It was 
introduced in commit 9a7b8e002e331d0599127f16613c32f425a14f2c ("dmaengine: add 
an shdma-base library") without a specific explanation.

I can change that. What's the rationale though ? Is it just a matter of 
realtime constraints, or are there other reasons for using tasklets instead of 
threaded IRQs ?

By the way, as callbacks can prepare new descriptors, running them in a 
tasklet means I can't sleep in the prep_* functions. It seems I can't sleep 
there for other reasons anyway, but I need to fix it, and it leaves me with no 
way to properly implement runtime PM support without using a work queue. I've 
raised the issue in "DMA engine API issue" mail thread, I'd like to sort it 
out first before fixing the driver.

> > +static int rcar_dmac_probe(struct platform_device *pdev)
> > +{
> > +	struct dma_device *engine;
> > +	struct rcar_dmac *dmac;
> > +	struct resource *mem;
> > +	unsigned int i;
> > +	int irq;
> > +	int ret;
> > +
> > +	dmac = devm_kzalloc(&pdev->dev, sizeof(*dmac), GFP_KERNEL);
> > +	if (!dmac)
> > +		return -ENOMEM;
> > +
> > +	dmac->dev = &pdev->dev;
> > +	platform_set_drvdata(pdev, dmac);
> > +
> > +	ret = rcar_dmac_parse_of(&pdev->dev, dmac);
> > +	if (ret < 0)
> > +		return ret;
> > +
> > +	dmac->channels = devm_kcalloc(&pdev->dev, dmac->n_channels,
> > +				      sizeof(*dmac->channels), GFP_KERNEL);
> > +	if (!dmac->channels)
> > +		return -ENOMEM;
> > +
> > +	/* Request resources. */
> > +	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
> > +	dmac->iomem = devm_ioremap_resource(&pdev->dev, mem);
> > +	if (IS_ERR(dmac->iomem))
> > +		return PTR_ERR(dmac->iomem);
> > +
> > +	irq = platform_get_irq_byname(pdev, "error");
> > +	if (irq < 0) {
> > +		dev_err(&pdev->dev, "no error IRQ specified\n");
> > +		return -ENODEV;
> > +	}
> > +
> > +	dmac->irqname = devm_kmalloc(dmac->dev, strlen(dev_name(dmac->dev)) + 
7,
> > +				     GFP_KERNEL);
> > +	if (!dmac->irqname)
> > +		return -ENOMEM;
> > +	sprintf(dmac->irqname, "%s:error", dev_name(&pdev->dev));
> > +
> > +	ret = devm_request_irq(&pdev->dev, irq, rcar_dmac_isr_error, 0,
> > +			       dmac->irqname, dmac);
> 
> and you can get isr invoked after this while you are still enabling device.

Right (although pretty unlikely). I'll reset the device first.

> > +static int rcar_dmac_remove(struct platform_device *pdev)
> > +{
> > +	struct rcar_dmac *dmac = platform_get_drvdata(pdev);
> > +	unsigned int i;
> > +
> > +	of_dma_controller_free(pdev->dev.of_node);
> > +	dma_async_device_unregister(&dmac->engine);
> > +
> > +	pm_runtime_disable(&pdev->dev);
> > +
> > +	for (i = 0; i < dmac->n_channels; ++i)
> > +		mutex_destroy(&dmac->channels[i].power_lock);
> > +
> > +	return 0;
> 
> and what prevents you getting irq and all the irq threads running and
> executed before you do remove. Lots of potential race conditions here!

Mostly the fact that the device should be quiescent when unbounded from the 
driver. If that's not the case it means that something is still using the DMA 
engine, and that's bound to cause damages anyway as the remove function will 
free all memory allocated for device structures.
Vinod Koul Aug. 5, 2014, 4:48 p.m. UTC | #3
On Mon, Aug 04, 2014 at 06:30:51PM +0200, Laurent Pinchart wrote:
> > > +static int rcar_dmac_alloc_chan_resources(struct dma_chan *chan)
> > > +{
> > > +	struct rcar_dmac_chan *rchan = to_rcar_dmac_chan(chan);
> > > +	int ret;
> > > +
> > > +	INIT_LIST_HEAD(&rchan->desc.free);
> > > +	INIT_LIST_HEAD(&rchan->desc.pending);
> > > +	INIT_LIST_HEAD(&rchan->desc.active);
> > > +	INIT_LIST_HEAD(&rchan->desc.done);
> > > +	INIT_LIST_HEAD(&rchan->desc.wait);
> > > +	INIT_LIST_HEAD(&rchan->desc.hw_free);
> > > +	INIT_LIST_HEAD(&rchan->desc.pages);
> > 
> > Seriously we need so many lists? 1st three makes sense but what is delta b/w
> > done and free. Once a descriptor is done it should be moved to freee list.
> > 
> > What does wait list mean and the last two?
> 
> The free list contains descriptors that have been allocated and are not used. 
> Those can be descriptors preallocated and not used yet, or descriptors that 
> have been used and recycled.
> 
> The pending list contains descriptors submitted (through dmaengine_submit) but 
> not issued (with dma_async_issue_pending) yet.
> 
> The active list contains descriptors issued with dma_async_issue_pending. The 
> list tail is or will be processed by the hardware, and the other elements in 
> the list will be processed in turn.
> 
> The done list contains descriptors that have been processed by the hardware 
> but haven't been processed by the interrupt thread (I'll discuss thread 
> vs.tasklet below) yet. Descriptors are added to the done list in the interrupt 
> handler, and taken from the list by the interrupt thread.
> 
> The wait list contains descriptors processed by the interrupt thread for which 
> the completion callback has been called, but that haven't been acked (tested 
> with async_tx_test_ack) yet. Descriptors are added to that list after calling 
> their callback function, and the list is then processed by 
> rcar_dmac_desc_recycle_acked (called both from the interrupt thread and from 
> the descriptor allocation function) to move all acked descriptors back to the 
> free list. 
Okay but do we really need the two list for transistions? The descriptors in
active_list can be marked for intermediate stages and then pushed to
free_list.

Also second question, do you submit multiple clinet request in a single go
to hardware or it is usually one descriptor?

> > okay this bit needs modification. The channel can be configured in any
> > direction. SO you can have one descriptor doing DMA_DEV_TO_MEM and another
> > DMA_MEM_TO_DEV. The prep_ calls have direction arg to be used, so please
> > store both :)
> 
> Right, but before fixing that, let's document exactly how the struct 
> dma_slave_config field are supposed to be used.
> 
> The direction field is documented in the header as
> 
>  * @direction: whether the data shall go in or out on this slave
>  * channel, right now. DMA_MEM_TO_DEV and DMA_DEV_TO_MEM are
>  * legal values.
> 
> That seems to contradict your statement.
Yes certainly, we need to fix that too :)

> > > +static size_t rcar_dmac_chan_get_residue(struct rcar_dmac_chan *chan,
> > > +					 dma_cookie_t cookie)
> > > +{
> > > +	struct rcar_dmac_desc *desc = chan->desc.running;
> > > +	struct rcar_dmac_hw_desc *hwdesc;
> > > +	size_t residue = 0;
> > > +
> > > +	if (!desc)
> > > +		return 0;
> > 
> > Why?
> > We should be able to query even when channel is not running, right?
> 
> Good question, should we ? :-)
> 
> This would require going through all lists to find the descriptor 
> corresponding to the cookie, and returning either 0 (if the descriptor has 
> been processed completely) or the descriptor length (if it hasn't been 
> processed yet). This is potentially costly.
Not really, the status of descriptor will tell you.

> The first case can be easily handled using the cookie only, when 
> dma_cookie_status() state returns DMA_COMPLETE then we know that the residue 
> is 0. This is actually the current behaviour, as dma_cookie_status() zeroes 
> the residue field.a
yes this is the key
> 
> Is the second case something we need to support ? chan->desc.running is only 
> NULL when there's no descriptor to process. In that case the cookie can either 
> correspond to a descriptor already complete (first case), a descriptor 
> prepared but not submitted or an invalid descriptor (in which case we can't 
> report the residue anyway). Is there really a use case for reporting the 
> residue for a descriptor not submitted ? That seems like a bad use of the API 
> to me, I think it would be better to forbid it.
So you need to check only running list. For the queued up descriptor it is
easy enough. For the one which is running you are already doing the
calculation. For completed but still not preocessed it is zero anyway

> > > +static int rcar_dmac_slave_caps(struct dma_chan *chan,
> > > +				struct dma_slave_caps *caps)
> > > +{
> > > +	memset(caps, 0, sizeof(*caps));
> > > +
> > > +	/*
> > > +	 * The device supports all widths from 1 to 64 bytes. As the
> > > +	 * dma_slave_buswidth enumeration is limited to 8 bytes, set the
> > > +	 * numerical value directly.
> > > +	 */
> > > +	caps->src_addr_widths = 0x7f;
> > > +	caps->dstn_addr_widths = 0x7f;
> > 
> > no magic numbers pls
> 
> Come on, 0x7f is hardly magic ;-)
For you yes, but down the line someone would scrath their head on why this
was 0x7f :)
> 
> Should I add DMA_SLAVE_BUSWIDTH_16_BYTES, DMA_SLAVE_BUSWIDTH_32_BYTES and 
> DMA_SLAVE_BUSWIDTH_64_BYTES to enum dma_slave_buswidth ?
Yup

> > > +#if defined(CONFIG_PM_SLEEP) || defined(CONFIG_PM_RUNTIME)
> > > +static int rcar_dmac_suspend(struct device *dev)
> > > +{
> > > +	return 0;
> > > +}
> > > +
> > > +static int rcar_dmac_resume(struct device *dev)
> > > +{
> > > +	struct rcar_dmac *dmac = dev_get_drvdata(dev);
> > > +
> > > +	return rcar_dmac_init(dmac);
> > > +}
> > > +#endif
> > 
> > I dont think you need these as you are using PM macros.
> 
> Then gcc will complain above functions being defined and not used.
oh, then we should fix in the macro itself rather than adding in driver.

> > > +static int rcar_dmac_chan_probe(struct rcar_dmac *dmac,
> > > +				struct rcar_dmac_chan *rchan,
> > > +				unsigned int index)
> > > +{
> > > +	struct platform_device *pdev = to_platform_device(dmac->dev);
> > > +	struct dma_chan *chan = &rchan->chan;
> > > +	char irqname[5];
> > > +	int irq;
> > > +	int ret;
> > > +
> > > +	rchan->index = index;
> > > +	rchan->iomem = dmac->iomem + RCAR_DMAC_CHAN_OFFSET(index);
> > > +	rchan->mid_rid = -EINVAL;
> > > +
> > > +	spin_lock_init(&rchan->lock);
> > > +	mutex_init(&rchan->power_lock);
> > > +
> > > +	/* Request the channel interrupt. */
> > > +	sprintf(irqname, "ch%u", index);
> > > +	irq = platform_get_irq_byname(pdev, irqname);
> > > +	if (irq < 0) {
> > > +		dev_err(dmac->dev, "no IRQ specified for channel %u\n", index);
> > > +		return -ENODEV;
> > > +	}
> > > +
> > > +	rchan->irqname = devm_kmalloc(dmac->dev,
> > > +				      strlen(dev_name(dmac->dev)) + 4,
> > > +				      GFP_KERNEL);
> > > +	if (!rchan->irqname)
> > > +		return -ENOMEM;
> > > +	sprintf(rchan->irqname, "%s:%u", dev_name(dmac->dev), index);
> > > +
> > > +	ret = devm_request_threaded_irq(dmac->dev, irq, 
> rcar_dmac_isr_channel,
> > > +					rcar_dmac_isr_channel_thread, 0,
> > > +					rchan->irqname, rchan);
> > 
> > I know sh drivers do this but WHY. All other dmac drivers are happy with API
> > and do tasklets.
> 
> Honestly, I don't know. I've just copied that code from shdma-base.c. It was 
> introduced in commit 9a7b8e002e331d0599127f16613c32f425a14f2c ("dmaengine: add 
> an shdma-base library") without a specific explanation.
> 
> I can change that. What's the rationale though ? Is it just a matter of 
> realtime constraints, or are there other reasons for using tasklets instead of 
> threaded IRQs ?
> 
> By the way, as callbacks can prepare new descriptors, running them in a 
> tasklet means I can't sleep in the prep_* functions. It seems I can't sleep 
> there for other reasons anyway, but I need to fix it, and it leaves me with no 
> way to properly implement runtime PM support without using a work queue. I've 
> raised the issue in "DMA engine API issue" mail thread, I'd like to sort it 
> out first before fixing the driver.
Okay lets sort that out first
Laurent Pinchart Aug. 5, 2014, 11:35 p.m. UTC | #4
Hi Vinod,

On Tuesday 05 August 2014 22:18:27 Vinod Koul wrote:
> On Mon, Aug 04, 2014 at 06:30:51PM +0200, Laurent Pinchart wrote:
> > > > +static int rcar_dmac_alloc_chan_resources(struct dma_chan *chan)
> > > > +{
> > > > +	struct rcar_dmac_chan *rchan = to_rcar_dmac_chan(chan);
> > > > +	int ret;
> > > > +
> > > > +	INIT_LIST_HEAD(&rchan->desc.free);
> > > > +	INIT_LIST_HEAD(&rchan->desc.pending);
> > > > +	INIT_LIST_HEAD(&rchan->desc.active);
> > > > +	INIT_LIST_HEAD(&rchan->desc.done);
> > > > +	INIT_LIST_HEAD(&rchan->desc.wait);
> > > > +	INIT_LIST_HEAD(&rchan->desc.hw_free);
> > > > +	INIT_LIST_HEAD(&rchan->desc.pages);
> > > 
> > > Seriously we need so many lists? 1st three makes sense but what is delta
> > > b/w done and free. Once a descriptor is done it should be moved to
> > > freee list.
> > > 
> > > What does wait list mean and the last two?
> > 
> > The free list contains descriptors that have been allocated and are not
> > used. Those can be descriptors preallocated and not used yet, or
> > descriptors that have been used and recycled.
> > 
> > The pending list contains descriptors submitted (through dmaengine_submit)
> > but not issued (with dma_async_issue_pending) yet.
> > 
> > The active list contains descriptors issued with dma_async_issue_pending.
> > The list tail is or will be processed by the hardware, and the other
> > elements in the list will be processed in turn.
> > 
> > The done list contains descriptors that have been processed by the
> > hardware but haven't been processed by the interrupt thread (I'll discuss
> > thread vs.tasklet below) yet. Descriptors are added to the done list in
> > the interrupt handler, and taken from the list by the interrupt thread.
> > 
> > The wait list contains descriptors processed by the interrupt thread for
> > which the completion callback has been called, but that haven't been
> > acked (tested with async_tx_test_ack) yet. Descriptors are added to that
> > list after calling their callback function, and the list is then
> > processed by rcar_dmac_desc_recycle_acked (called both from the interrupt
> > thread and from the descriptor allocation function) to move all acked
> > descriptors back to the free list.
> 
> Okay but do we really need the two list for transistions? The descriptors in
> active_list can be marked for intermediate stages and then pushed to
> free_list.

Using separate lists seem simpler to me, as I can minimize the amount of code 
executed with a spinlock held by using the list splice API and then iterating 
over a private list after releasing the lock, instead of iterating over the 
whole list and checking the state of every descriptor with the spinlock held. 
Feel free to prove me wrong if you can see an easy implementation that gets 
rid of the wait list though :-)

> Also second question, do you submit multiple clinet request in a single go
> to hardware or it is usually one descriptor?

It depends what you mean by descriptor. The driver is always involved to push 
the next transaction (dma_async_tx_descriptor) to the hardware. When a 
transaction is split into multiple hardware transfers, the driver creates an 
array of hardware transfer descriptors and submits it in one go (except when 
the source or destination addresses are above 4GB, in which case the hardware 
isn't able to chain transfers).

> >> okay this bit needs modification. The channel can be configured in any
> >> direction. SO you can have one descriptor doing DMA_DEV_TO_MEM and
> >> another DMA_MEM_TO_DEV. The prep_ calls have direction arg to be used,
> >> so please store both :)
> > 
> > Right, but before fixing that, let's document exactly how the struct
> > dma_slave_config field are supposed to be used.
> > 
> > The direction field is documented in the header as
> > 
> >  * @direction: whether the data shall go in or out on this slave
> >  * channel, right now. DMA_MEM_TO_DEV and DMA_DEV_TO_MEM are
> >  * legal values.
> > 
> > That seems to contradict your statement.
> 
> Yes certainly, we need to fix that too :)

I was expecting you to tell how you would like that to be fixed... What's the 
right interpretation of the dma_slave_config direction field ?

> >>> +static size_t rcar_dmac_chan_get_residue(struct rcar_dmac_chan *chan,
> >>> +					 dma_cookie_t cookie)
> >>> +{
> >>> +	struct rcar_dmac_desc *desc = chan->desc.running;
> >>> +	struct rcar_dmac_hw_desc *hwdesc;
> >>> +	size_t residue = 0;
> >>> +
> >>> +	if (!desc)
> >>> +		return 0;
> >> 
> >> Why?
> >> We should be able to query even when channel is not running, right?
> > 
> > Good question, should we ? :-)
> > 
> > This would require going through all lists to find the descriptor
> > corresponding to the cookie, and returning either 0 (if the descriptor has
> > been processed completely) or the descriptor length (if it hasn't been
> > processed yet). This is potentially costly.
> 
> Not really, the status of descriptor will tell you.
> 
> > The first case can be easily handled using the cookie only, when
> > dma_cookie_status() state returns DMA_COMPLETE then we know that the
> > residue is 0. This is actually the current behaviour, as
> > dma_cookie_status() zeroes the residue field.a
> 
> yes this is the key
> 
> > Is the second case something we need to support ? chan->desc.running is
> > only NULL when there's no descriptor to process. In that case the cookie
> > can either correspond to a descriptor already complete (first case), a
> > descriptor prepared but not submitted or an invalid descriptor (in which
> > case we can't report the residue anyway). Is there really a use case for
> > reporting the residue for a descriptor not submitted ? That seems like a
> > bad use of the API to me, I think it would be better to forbid it.
> 
> So you need to check only running list. For the queued up descriptor it is
> easy enough. For the one which is running you are already doing the
> calculation. For completed but still not preocessed it is zero anyway

I'm still not convinced. Reporting the residue for a descriptor that hasn't 
been started yet will require looping through lists with locks held, and I'm 
not sure to see what benefit it would bring. Do we have DMA engine users that 
retrieve (and use) the residue value of descriptors that haven't been started 
yet ?

> >>> +static int rcar_dmac_slave_caps(struct dma_chan *chan,
> >>> +				struct dma_slave_caps *caps)
> >>> +{
> >>> +	memset(caps, 0, sizeof(*caps));
> >>> +
> >>> +	/*
> >>> +	 * The device supports all widths from 1 to 64 bytes. As the
> >>> +	 * dma_slave_buswidth enumeration is limited to 8 bytes, set the
> >>> +	 * numerical value directly.
> >>> +	 */
> >>> +	caps->src_addr_widths = 0x7f;
> >>> +	caps->dstn_addr_widths = 0x7f;
> >> 
> >> no magic numbers pls
> > 
> > Come on, 0x7f is hardly magic ;-)
> 
> For you yes, but down the line someone would scrath their head on why this
> was 0x7f :)
> 
> > Should I add DMA_SLAVE_BUSWIDTH_16_BYTES, DMA_SLAVE_BUSWIDTH_32_BYTES and
> > DMA_SLAVE_BUSWIDTH_64_BYTES to enum dma_slave_buswidth ?
> 
> Yup

OK, will do.

> >>> +#if defined(CONFIG_PM_SLEEP) || defined(CONFIG_PM_RUNTIME)
> >>> +static int rcar_dmac_suspend(struct device *dev)
> >>> +{
> >>> +	return 0;
> >>> +}
> >>> +
> >>> +static int rcar_dmac_resume(struct device *dev)
> >>> +{
> >>> +	struct rcar_dmac *dmac = dev_get_drvdata(dev);
> >>> +
> >>> +	return rcar_dmac_init(dmac);
> >>> +}
> >>> +#endif
> >> 
> >> I dont think you need these as you are using PM macros.
> > 
> > Then gcc will complain above functions being defined and not used.
> 
> oh, then we should fix in the macro itself rather than adding in driver.

Fix what, the SET_SYSTEM_SLEEP_PM_OPS and SET_RUNTIME_PM_OPS macros ? How so ?

> >>> +static int rcar_dmac_chan_probe(struct rcar_dmac *dmac,
> >>> +				struct rcar_dmac_chan *rchan,
> >>> +				unsigned int index)
> >>> +{
> >>> +	struct platform_device *pdev = to_platform_device(dmac->dev);
> >>> +	struct dma_chan *chan = &rchan->chan;
> >>> +	char irqname[5];
> >>> +	int irq;
> >>> +	int ret;
> >>> +
> >>> +	rchan->index = index;
> >>> +	rchan->iomem = dmac->iomem + RCAR_DMAC_CHAN_OFFSET(index);
> >>> +	rchan->mid_rid = -EINVAL;
> >>> +
> >>> +	spin_lock_init(&rchan->lock);
> >>> +	mutex_init(&rchan->power_lock);
> >>> +
> >>> +	/* Request the channel interrupt. */
> >>> +	sprintf(irqname, "ch%u", index);
> >>> +	irq = platform_get_irq_byname(pdev, irqname);
> >>> +	if (irq < 0) {
> >>> +		dev_err(dmac->dev, "no IRQ specified for channel %u\n", index);
> >>> +		return -ENODEV;
> >>> +	}
> >>> +
> >>> +	rchan->irqname = devm_kmalloc(dmac->dev,
> >>> +				      strlen(dev_name(dmac->dev)) + 4,
> >>> +				      GFP_KERNEL);
> >>> +	if (!rchan->irqname)
> >>> +		return -ENOMEM;
> >>> +	sprintf(rchan->irqname, "%s:%u", dev_name(dmac->dev), index);
> >>> +
> >>> +	ret = devm_request_threaded_irq(dmac->dev, irq,
> > 
> > rcar_dmac_isr_channel,
> > 
> >>> +					rcar_dmac_isr_channel_thread, 0,
> >>> +					rchan->irqname, rchan);
> >> 
> >> I know sh drivers do this but WHY. All other dmac drivers are happy with
> >> API and do tasklets.
> > 
> > Honestly, I don't know. I've just copied that code from shdma-base.c. It
> > was introduced in commit 9a7b8e002e331d0599127f16613c32f425a14f2c
> > ("dmaengine: add an shdma-base library") without a specific explanation.
> > 
> > I can change that. What's the rationale though ? Is it just a matter of
> > realtime constraints, or are there other reasons for using tasklets
> > instead of threaded IRQs ?
> > 
> > By the way, as callbacks can prepare new descriptors, running them in a
> > tasklet means I can't sleep in the prep_* functions. It seems I can't
> > sleep there for other reasons anyway, but I need to fix it, and it leaves
> > me with no way to properly implement runtime PM support without using a
> > work queue. I've raised the issue in "DMA engine API issue" mail thread,
> > I'd like to sort it out first before fixing the driver.
> 
> Okay lets sort that out first

I'm all ears, please give me your opinion in a reply to that mail thread.
Geert Uytterhoeven Aug. 6, 2014, 8:46 a.m. UTC | #5
On Thu, Jul 31, 2014 at 2:34 AM, Simon Horman
<horms+renesas@verge.net.au> wrote:
> --- a/drivers/dma/sh/Kconfig
> +++ b/drivers/dma/sh/Kconfig
> @@ -52,3 +52,11 @@ config RCAR_AUDMAC_PP
>         depends on SH_DMAE_BASE
>         help
>           Enable support for the Renesas R-Car Audio DMAC Peripheral Peripheral controllers.
> +
> +config RCAR_DMAC
> +       tristate "Renesas R-Car Gen2 DMA Controller"
> +       depends on ARCH_SHMOBILE || COMPILE_TEST
> +       select DMA_ENGINE
> +       help
> +         This driver supports the general purpose DMA controller found in the
> +         Renesas R-Car second generator SoCs.

generation

Gr{oetje,eeting}s,

                        Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
                                -- Linus Torvalds
--
To unsubscribe from this list: send the line "unsubscribe dmaengine" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Geert Uytterhoeven Aug. 6, 2014, 12:47 p.m. UTC | #6
On Thu, Jul 31, 2014 at 2:34 AM, Simon Horman
<horms+renesas@verge.net.au> wrote:
> +static struct dma_async_tx_descriptor *
> +rcar_dmac_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr,
> +                         size_t buf_len, size_t period_len,
> +                         enum dma_transfer_direction dir, unsigned long flags,
> +                         void *context)

BTW, the "context" parameter should be removed, as Laurent's independent
patch to remove it everywhere else has hit slave-dma/next"

drivers/dma/sh/rcar-dmac.c: In function 'rcar_dmac_probe':
drivers/dma/sh/rcar-dmac.c:1462:33: warning: assignment from
incompatible pointer type
  engine->device_prep_dma_cyclic = rcar_dmac_prep_dma_cyclic;
                                 ^
Gr{oetje,eeting}s,

                        Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
                                -- Linus Torvalds
--
To unsubscribe from this list: send the line "unsubscribe dmaengine" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Laurent Pinchart Aug. 6, 2014, 3:36 p.m. UTC | #7
Hi Geert,

On Wednesday 06 August 2014 14:47:39 Geert Uytterhoeven wrote:
> On Thu, Jul 31, 2014 at 2:34 AM, Simon Horman wrote:
> >
> > +static struct dma_async_tx_descriptor *
> > +rcar_dmac_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr,
> > +                         size_t buf_len, size_t period_len,
> > +                         enum dma_transfer_direction dir, unsigned long
> > flags, +                         void *context)
> 
> BTW, the "context" parameter should be removed, as Laurent's independent
> patch to remove it everywhere else has hit slave-dma/next"
> 
> drivers/dma/sh/rcar-dmac.c: In function 'rcar_dmac_probe':
> drivers/dma/sh/rcar-dmac.c:1462:33: warning: assignment from
> incompatible pointer type
>   engine->device_prep_dma_cyclic = rcar_dmac_prep_dma_cyclic;

Of course, it will be removed in the next version.
Vinod Koul Aug. 19, 2014, 3:54 p.m. UTC | #8
On Wed, Aug 06, 2014 at 01:35:07AM +0200, Laurent Pinchart wrote:
> > >> okay this bit needs modification. The channel can be configured in any
> > >> direction. SO you can have one descriptor doing DMA_DEV_TO_MEM and
> > >> another DMA_MEM_TO_DEV. The prep_ calls have direction arg to be used,
> > >> so please store both :)
> > > 
> > > Right, but before fixing that, let's document exactly how the struct
> > > dma_slave_config field are supposed to be used.
> > > 
> > > The direction field is documented in the header as
> > > 
> > >  * @direction: whether the data shall go in or out on this slave
> > >  * channel, right now. DMA_MEM_TO_DEV and DMA_DEV_TO_MEM are
> > >  * legal values.
> > > 
> > > That seems to contradict your statement.
> > 
> > Yes certainly, we need to fix that too :)
> 
> I was expecting you to tell how you would like that to be fixed... What's the 
> right interpretation of the dma_slave_config direction field ?
Deprecated, not to be used anymore. Pls use the direction in prep_xxx API


> > >>> +static size_t rcar_dmac_chan_get_residue(struct rcar_dmac_chan *chan,
> > >>> +					 dma_cookie_t cookie)
> > >>> +{
> > >>> +	struct rcar_dmac_desc *desc = chan->desc.running;
> > >>> +	struct rcar_dmac_hw_desc *hwdesc;
> > >>> +	size_t residue = 0;
> > >>> +
> > >>> +	if (!desc)
> > >>> +		return 0;
> > >> 
> > >> Why?
> > >> We should be able to query even when channel is not running, right?
> > > 
> > > Good question, should we ? :-)
> > > 
> > > This would require going through all lists to find the descriptor
> > > corresponding to the cookie, and returning either 0 (if the descriptor has
> > > been processed completely) or the descriptor length (if it hasn't been
> > > processed yet). This is potentially costly.
> > 
> > Not really, the status of descriptor will tell you.
> > 
> > > The first case can be easily handled using the cookie only, when
> > > dma_cookie_status() state returns DMA_COMPLETE then we know that the
> > > residue is 0. This is actually the current behaviour, as
> > > dma_cookie_status() zeroes the residue field.a
> > 
> > yes this is the key
> > 
> > > Is the second case something we need to support ? chan->desc.running is
> > > only NULL when there's no descriptor to process. In that case the cookie
> > > can either correspond to a descriptor already complete (first case), a
> > > descriptor prepared but not submitted or an invalid descriptor (in which
> > > case we can't report the residue anyway). Is there really a use case for
> > > reporting the residue for a descriptor not submitted ? That seems like a
> > > bad use of the API to me, I think it would be better to forbid it.
> > 
> > So you need to check only running list. For the queued up descriptor it is
> > easy enough. For the one which is running you are already doing the
> > calculation. For completed but still not preocessed it is zero anyway
> 
> I'm still not convinced. Reporting the residue for a descriptor that hasn't 
> been started yet will require looping through lists with locks held, and I'm 
> not sure to see what benefit it would bring. Do we have DMA engine users that 
> retrieve (and use) the residue value of descriptors that haven't been started 
> yet ?
well for the descriptor not started you have only one list to look.
Laurent Pinchart Aug. 20, 2014, 5:27 p.m. UTC | #9
Hi Vinod,

(CC'ing Maxime Ripard)

On Tuesday 19 August 2014 21:24:08 Vinod Koul wrote:
> On Wed, Aug 06, 2014 at 01:35:07AM +0200, Laurent Pinchart wrote:
> > > >> okay this bit needs modification. The channel can be configured in
> > > >> any direction. SO you can have one descriptor doing DMA_DEV_TO_MEM
> > > >> and another DMA_MEM_TO_DEV. The prep_ calls have direction arg to be
> > > >> used, so please store both :)
> > > > 
> > > > Right, but before fixing that, let's document exactly how the struct
> > > > dma_slave_config field are supposed to be used.
> > > > 
> > > > The direction field is documented in the header as
> > > > 
> > > >  * @direction: whether the data shall go in or out on this slave
> > > >  * channel, right now. DMA_MEM_TO_DEV and DMA_DEV_TO_MEM are
> > > >  * legal values.
> > > > 
> > > > That seems to contradict your statement.
> > > 
> > > Yes certainly, we need to fix that too :)
> > 
> > I was expecting you to tell how you would like that to be fixed... What's
> > the right interpretation of the dma_slave_config direction field ?
> 
> Deprecated, not to be used anymore. Pls use the direction in prep_xxx API

OK, that's good to know. Maxime, could you include that in the next version 
your documentation patch ?

> > > >>> +static size_t rcar_dmac_chan_get_residue(struct rcar_dmac_chan
> > > >>> *chan,
> > > >>> +					 dma_cookie_t cookie)
> > > >>> +{
> > > >>> +	struct rcar_dmac_desc *desc = chan->desc.running;
> > > >>> +	struct rcar_dmac_hw_desc *hwdesc;
> > > >>> +	size_t residue = 0;
> > > >>> +
> > > >>> +	if (!desc)
> > > >>> +		return 0;
> > > >> 
> > > >> Why?
> > > >> We should be able to query even when channel is not running, right?
> > > > 
> > > > Good question, should we ? :-)
> > > > 
> > > > This would require going through all lists to find the descriptor
> > > > corresponding to the cookie, and returning either 0 (if the descriptor
> > > > has
> > > > been processed completely) or the descriptor length (if it hasn't been
> > > > processed yet). This is potentially costly.
> > > 
> > > Not really, the status of descriptor will tell you.
> > > 
> > > > The first case can be easily handled using the cookie only, when
> > > > dma_cookie_status() state returns DMA_COMPLETE then we know that the
> > > > residue is 0. This is actually the current behaviour, as
> > > > dma_cookie_status() zeroes the residue field.a
> > > 
> > > yes this is the key
> > > 
> > > > Is the second case something we need to support ? chan->desc.running
> > > > is only NULL when there's no descriptor to process. In that case the
> > > > cookie can either correspond to a descriptor already complete (first
> > > > case), a descriptor prepared but not submitted or an invalid
> > > > descriptor (in which case we can't report the residue anyway). Is
> > > > there really a use case for reporting the residue for a descriptor not
> > > > submitted ? That seems like a bad use of the API to me, I think it
> > > > would be better to forbid it.
> > > 
> > > So you need to check only running list. For the queued up descriptor it
> > > is easy enough. For the one which is running you are already doing the
> > > calculation. For completed but still not preocessed it is zero anyway
> > 
> > I'm still not convinced. Reporting the residue for a descriptor that
> > hasn't been started yet will require looping through lists with locks
> > held, and I'm not sure to see what benefit it would bring. Do we have DMA
> > engine users that retrieve (and use) the residue value of descriptors that
> > haven't been started yet ?
> 
> well for the descriptor not started you have only one list to look.

Two of them actually, the pending (submitted but not issued) and the active 
list. But regardless of whether that would be one or two lists, it would still 
involve looping over descriptors with a lock held. If there's no use case for 
this on the caller side, I'd rather not implement dead code.
Vinod Koul Aug. 28, 2014, 7:10 a.m. UTC | #10
On Wed, Aug 20, 2014 at 07:27:32PM +0200, Laurent Pinchart wrote:

> > > > >>> +static size_t rcar_dmac_chan_get_residue(struct rcar_dmac_chan
> > > > >>> *chan,
> > > > >>> +					 dma_cookie_t cookie)
> > > > >>> +{
> > > > >>> +	struct rcar_dmac_desc *desc = chan->desc.running;
> > > > >>> +	struct rcar_dmac_hw_desc *hwdesc;
> > > > >>> +	size_t residue = 0;
> > > > >>> +
> > > > >>> +	if (!desc)
> > > > >>> +		return 0;
> > > > >> 
> > > > >> Why?
> > > > >> We should be able to query even when channel is not running, right?
> > > > > 
> > > > > Good question, should we ? :-)
> > > > > 
> > > > > This would require going through all lists to find the descriptor
> > > > > corresponding to the cookie, and returning either 0 (if the descriptor
> > > > > has
> > > > > been processed completely) or the descriptor length (if it hasn't been
> > > > > processed yet). This is potentially costly.
> > > > 
> > > > Not really, the status of descriptor will tell you.
> > > > 
> > > > > The first case can be easily handled using the cookie only, when
> > > > > dma_cookie_status() state returns DMA_COMPLETE then we know that the
> > > > > residue is 0. This is actually the current behaviour, as
> > > > > dma_cookie_status() zeroes the residue field.a
> > > > 
> > > > yes this is the key
> > > > 
> > > > > Is the second case something we need to support ? chan->desc.running
> > > > > is only NULL when there's no descriptor to process. In that case the
> > > > > cookie can either correspond to a descriptor already complete (first
> > > > > case), a descriptor prepared but not submitted or an invalid
> > > > > descriptor (in which case we can't report the residue anyway). Is
> > > > > there really a use case for reporting the residue for a descriptor not
> > > > > submitted ? That seems like a bad use of the API to me, I think it
> > > > > would be better to forbid it.
> > > > 
> > > > So you need to check only running list. For the queued up descriptor it
> > > > is easy enough. For the one which is running you are already doing the
> > > > calculation. For completed but still not preocessed it is zero anyway
> > > 
> > > I'm still not convinced. Reporting the residue for a descriptor that
> > > hasn't been started yet will require looping through lists with locks
> > > held, and I'm not sure to see what benefit it would bring. Do we have DMA
> > > engine users that retrieve (and use) the residue value of descriptors that
> > > haven't been started yet ?
> > 
> > well for the descriptor not started you have only one list to look.
> 
> Two of them actually, the pending (submitted but not issued) and the active 
> list. But regardless of whether that would be one or two lists, it would still 
> involve looping over descriptors with a lock held. If there's no use case for 
> this on the caller side, I'd rather not implement dead code.
But you can reduce that to 1 by keeping tracked of submitted desciptor
number along with completed we do now

Well its not about dead code, but about API expects, since API allows
it we should put restrictionss. If people use ut rarely then we dont need to
worry about performance on this one
Laurent Pinchart Aug. 28, 2014, 4:39 p.m. UTC | #11
Hi Vinod,

On Thursday 28 August 2014 12:40:53 Vinod Koul wrote:
> On Wed, Aug 20, 2014 at 07:27:32PM +0200, Laurent Pinchart wrote:
> >>>>>>> +static size_t rcar_dmac_chan_get_residue(struct rcar_dmac_chan
> >>>>>>> *chan,
> >>>>>>> +					 dma_cookie_t cookie)
> >>>>>>> +{
> >>>>>>> +	struct rcar_dmac_desc *desc = chan->desc.running;
> >>>>>>> +	struct rcar_dmac_hw_desc *hwdesc;
> >>>>>>> +	size_t residue = 0;
> >>>>>>> +
> >>>>>>> +	if (!desc)
> >>>>>>> +		return 0;
> >>>>>> 
> >>>>>> Why?
> >>>>>> We should be able to query even when channel is not running,
> >>>>>> right?
> >>>>> 
> >>>>> Good question, should we ? :-)
> >>>>> 
> >>>>> This would require going through all lists to find the descriptor
> >>>>> corresponding to the cookie, and returning either 0 (if the
> >>>>> descriptor has been processed completely) or the descriptor length
> >>>>> (if it hasn't been processed yet). This is potentially costly.
> >>>> 
> >>>> Not really, the status of descriptor will tell you.
> >>>>
> >>>>> The first case can be easily handled using the cookie only, when
> >>>>> dma_cookie_status() state returns DMA_COMPLETE then we know that
> >>>>> the residue is 0. This is actually the current behaviour, as
> >>>>> dma_cookie_status() zeroes the residue field.a
> >>>> 
> >>>> yes this is the key
> >>>> 
> >>>>> Is the second case something we need to support ?
> >>>>> chan->desc.running is only NULL when there's no descriptor to
> >>>>> process. In that case the cookie can either correspond to a
> >>>>> descriptor already complete (first case), a descriptor prepared but
> >>>>> not submitted or an invalid descriptor (in which case we can't
> >>>>> report the residue anyway). Is there really a use case for
> >>>>> reporting the residue for a descriptor not submitted ? That seems
> >>>>> like a bad use of the API to me, I think it would be better to
> >>>>> forbid it.
> >>>> 
> >>>> So you need to check only running list. For the queued up descriptor
> >>>> it is easy enough. For the one which is running you are already
> >>>> doing the calculation. For completed but still not preocessed it is
> >>>> zero anyway
> >>> 
> >>> I'm still not convinced. Reporting the residue for a descriptor that
> >>> hasn't been started yet will require looping through lists with locks
> >>> held, and I'm not sure to see what benefit it would bring. Do we have
> >>> DMA engine users that retrieve (and use) the residue value of
> >>> descriptors that haven't been started yet ?
> >> 
> >> well for the descriptor not started you have only one list to look.
> > 
> > Two of them actually, the pending (submitted but not issued) and the
> > active list. But regardless of whether that would be one or two lists, it
> > would still involve looping over descriptors with a lock held. If there's
> > no use case for this on the caller side, I'd rather not implement dead
> > code.
> 
> But you can reduce that to 1 by keeping tracked of submitted desciptor
> number along with completed we do now
> 
> Well its not about dead code, but about API expects, since API allows
> it we should put restrictionss. If people use ut rarely then we dont need to
> worry about performance on this one.

My point is twofold.

First, the residue reporting API isn't documented, and drivers implementing it 
exhibit various behaviours. We thus need to agree on a standard behaviour, 
document it, and over time fix the DMA engine drivers (and possibly their 
users) to implement the standard behaviour. This effectively puts us in an API 
design stage.

Then, to design the residue reporting API, we should take into account both 
the potential use of each features by the DMA engine users, and the complexity 
of their implementations in the DMA engine drivers. I have the impression that 
there is little use for querying the residue of a descriptor that hasn't been 
started. I would thus be inclined not to include that feature in the API, 
especially as adding it later would be easy, while removing it would be more 
complex.

Of course, if there are real use cases on the DMA engine user side for such an 
API, I have no problem implementing it.
diff mbox

Patch

diff --git a/drivers/dma/sh/Kconfig b/drivers/dma/sh/Kconfig
index 0349125..aee59ed 100644
--- a/drivers/dma/sh/Kconfig
+++ b/drivers/dma/sh/Kconfig
@@ -52,3 +52,11 @@  config RCAR_AUDMAC_PP
 	depends on SH_DMAE_BASE
 	help
 	  Enable support for the Renesas R-Car Audio DMAC Peripheral Peripheral controllers.
+
+config RCAR_DMAC
+	tristate "Renesas R-Car Gen2 DMA Controller"
+	depends on ARCH_SHMOBILE || COMPILE_TEST
+	select DMA_ENGINE
+	help
+	  This driver supports the general purpose DMA controller found in the
+	  Renesas R-Car second generator SoCs.
diff --git a/drivers/dma/sh/Makefile b/drivers/dma/sh/Makefile
index 0a5cfdb..2852f9d 100644
--- a/drivers/dma/sh/Makefile
+++ b/drivers/dma/sh/Makefile
@@ -16,3 +16,4 @@  obj-$(CONFIG_SH_DMAE) += shdma.o
 obj-$(CONFIG_SUDMAC) += sudmac.o
 obj-$(CONFIG_RCAR_HPB_DMAE) += rcar-hpbdma.o
 obj-$(CONFIG_RCAR_AUDMAC_PP) += rcar-audmapp.o
+obj-$(CONFIG_RCAR_DMAC) += rcar-dmac.o
diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c
new file mode 100644
index 0000000..45ae37e
--- /dev/null
+++ b/drivers/dma/sh/rcar-dmac.c
@@ -0,0 +1,1525 @@ 
+/*
+ * Renesas R-Car Gen2 DMA Controller Driver
+ *
+ * Copyright (C) 2014 Renesas Electronics Inc.
+ *
+ * Author: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/dmaengine.h>
+#include <linux/interrupt.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/of.h>
+#include <linux/of_dma.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+
+#include "../dmaengine.h"
+
+/*
+ * struct rcar_dmac_hw_desc - Descriptor for a single hardware transfer
+ * @node: entry in the parent's hwdesc list
+ * @src_addr: device source address
+ * @dst_addr: device destination address
+ * @size: transfer size in bytes
+ */
+struct rcar_dmac_hw_desc {
+	struct list_head node;
+
+	dma_addr_t src_addr;
+	dma_addr_t dst_addr;
+	u32 size;
+};
+
+/*
+ * struct rcar_dmac_desc - R-Car Gen2 DMA Transfer Descriptor
+ * @async_tx: base DMA asynchronous transaction descriptor
+ * @direction: direction of the DMA transfer
+ * @xfer_shift: log2 of the transfer size
+ * @chcr: value of the channel configuration register for this transfer
+ * @node: entry in the channel's descriptors lists
+ * @hwdescs: list of hardware descriptors for this transfer
+ * @running: the hardware transfer being currently processed
+ * @size: transfer size in bytes
+ * @cyclic: when set indicates that the DMA transfer is cyclic
+ */
+struct rcar_dmac_desc {
+	struct dma_async_tx_descriptor async_tx;
+	enum dma_transfer_direction direction;
+	unsigned int xfer_shift;
+	u32 chcr;
+
+	struct list_head node;
+	struct list_head hwdescs;
+	struct rcar_dmac_hw_desc *running;
+
+	size_t size;
+	bool cyclic;
+};
+
+#define to_rcar_dmac_desc(d)	container_of(d, struct rcar_dmac_desc, async_tx)
+
+/*
+ * struct rcar_dmac_desc_page - One page worth of descriptors
+ * @node: entry in the channel's pages list
+ * @descs: array of DMA descriptors
+ * @hwdescs: array of hardware descriptors
+ */
+struct rcar_dmac_desc_page {
+	struct list_head node;
+
+	union {
+		struct rcar_dmac_desc descs[0];
+		struct rcar_dmac_hw_desc hwdescs[0];
+	};
+};
+
+#define RCAR_DMAC_DESCS_PER_PAGE					\
+	((PAGE_SIZE - offsetof(struct rcar_dmac_desc_page, descs)) /	\
+	sizeof(struct rcar_dmac_desc))
+#define RCAR_DMAC_HWDESCS_PER_PAGE					\
+	((PAGE_SIZE - offsetof(struct rcar_dmac_desc_page, hwdescs)) /	\
+	sizeof(struct rcar_dmac_hw_desc))
+
+/*
+ * struct rcar_dmac_chan - R-Car Gen2 DMA Controller Channel
+ * @chan: base DMA channel object
+ * @iomem: channel I/O memory base
+ * @index: index of this channel in the controller
+ * @irqname: name of the channel IRQ
+ * @xfer_size: size (in bytes) of individual hardware transfers
+ * @slave_addr: slave source or destination memory address
+ * @mid_rid: hardware MID/RID for the DMA client using this channel
+ * @lock: protects the channel CHCR register and the desc members
+ * @power_lock: protects the submitted counter for pm_runtime_get_sync
+ * @desc.free: list of free descriptors
+ * @desc.pending: list of pending descriptors (submitted with tx_submit)
+ * @desc.active: list of active descriptors (activated with issue_pending)
+ * @desc.done: list of completed descriptors
+ * @desc.wait: list of descriptors waiting for an ack
+ * @desc.running: the descriptor being processed (a member of the active list)
+ * @desc.submitted: number of descriptors submitted and not complete yet
+ * @desc.hw_free: list of free hardware descriptors
+ * @desc.pages: list of pages used by allocated descriptors
+ */
+struct rcar_dmac_chan {
+	struct dma_chan chan;
+	void __iomem *iomem;
+	unsigned int index;
+	char *irqname;
+
+	unsigned int xfer_size;
+	dma_addr_t slave_addr;
+	int mid_rid;
+
+	spinlock_t lock;
+	struct mutex power_lock;
+
+	struct {
+		struct list_head free;
+		struct list_head pending;
+		struct list_head active;
+		struct list_head done;
+		struct list_head wait;
+		struct rcar_dmac_desc *running;
+		unsigned int submitted;
+
+		struct list_head hw_free;
+
+		struct list_head pages;
+	} desc;
+};
+
+#define to_rcar_dmac_chan(c)	container_of(c, struct rcar_dmac_chan, chan)
+
+/*
+ * struct rcar_dmac - R-Car Gen2 DMA Controller
+ * @engine: base DMA engine object
+ * @dev: the hardware device
+ * @iomem: remapped I/O memory base
+ * @irqname: name of the error IRQ
+ * @n_channels: number of available channels
+ * @channels: array of DMAC channels
+ * @modules: bitmask of client modules in use
+ */
+struct rcar_dmac {
+	struct dma_device engine;
+	struct device *dev;
+	void __iomem *iomem;
+	char *irqname;
+
+	unsigned int n_channels;
+	struct rcar_dmac_chan *channels;
+
+	unsigned long modules[256 / BITS_PER_LONG];
+};
+
+#define to_rcar_dmac(d)		container_of(d, struct rcar_dmac, engine)
+
+/* -----------------------------------------------------------------------------
+ * Registers
+ */
+
+#define RCAR_DMAC_CHAN_OFFSET(i)	(0x8000 + 0x80 * (i))
+
+#define RCAR_DMAISTA			0x0020
+#define RCAR_DMASEC			0x0030
+#define RCAR_DMAOR			0x0060
+#define RCAR_DMAOR_PRI_FIXED		(0 << 8)
+#define RCAR_DMAOR_PRI_ROUND_ROBIN	(3 << 8)
+#define RCAR_DMAOR_AE			(1 << 2)
+#define RCAR_DMAOR_DME			(1 << 0)
+#define RCAR_DMACHCLR			0x0080
+#define RCAR_DMADPSEC			0x00a0
+
+#define RCAR_DMASAR			0x0000
+#define RCAR_DMADAR			0x0004
+#define RCAR_DMATCR			0x0008
+#define RCAR_DMATCR_MASK		0x00ffffff
+#define RCAR_DMATSR			0x0028
+#define RCAR_DMACHCR			0x000c
+#define RCAR_DMACHCR_CAE		(1 << 31)
+#define RCAR_DMACHCR_CAIE		(1 << 30)
+#define RCAR_DMACHCR_DPM_DISABLED	(0 << 28)
+#define RCAR_DMACHCR_DPM_ENABLED	(1 << 28)
+#define RCAR_DMACHCR_DPM_REPEAT		(2 << 28)
+#define RCAR_DMACHCR_DPM_INFINIE	(3 << 28)
+#define RCAR_DMACHCR_RPT_SAR		(1 << 27)
+#define RCAR_DMACHCR_RPT_DAR		(1 << 26)
+#define RCAR_DMACHCR_RPT_TCR		(1 << 25)
+#define RCAR_DMACHCR_DPB		(1 << 22)
+#define RCAR_DMACHCR_DSE		(1 << 19)
+#define RCAR_DMACHCR_DSIE		(1 << 18)
+#define RCAR_DMACHCR_TS_1B		((0 << 20) | (0 << 3))
+#define RCAR_DMACHCR_TS_2B		((0 << 20) | (1 << 3))
+#define RCAR_DMACHCR_TS_4B		((0 << 20) | (2 << 3))
+#define RCAR_DMACHCR_TS_16B		((0 << 20) | (3 << 3))
+#define RCAR_DMACHCR_TS_32B		((1 << 20) | (0 << 3))
+#define RCAR_DMACHCR_TS_64B		((1 << 20) | (1 << 3))
+#define RCAR_DMACHCR_TS_8B		((1 << 20) | (3 << 3))
+#define RCAR_DMACHCR_DM_FIXED		(0 << 14)
+#define RCAR_DMACHCR_DM_INC		(1 << 14)
+#define RCAR_DMACHCR_DM_DEC		(2 << 14)
+#define RCAR_DMACHCR_SM_FIXED		(0 << 12)
+#define RCAR_DMACHCR_SM_INC		(1 << 12)
+#define RCAR_DMACHCR_SM_DEC		(2 << 12)
+#define RCAR_DMACHCR_RS_AUTO		(4 << 8)
+#define RCAR_DMACHCR_RS_DMARS		(8 << 8)
+#define RCAR_DMACHCR_IE			(1 << 2)
+#define RCAR_DMACHCR_TE			(1 << 1)
+#define RCAR_DMACHCR_DE			(1 << 0)
+#define RCAR_DMATCRB			0x0018
+#define RCAR_DMATSRB			0x0038
+#define RCAR_DMACHCRB			0x001c
+#define RCAR_DMACHCRB_DCNT(n)		((n) << 24)
+#define RCAR_DMACHCRB_DPTR(n)		((n) << 16)
+#define RCAR_DMACHCRB_DRST		(1 << 15)
+#define RCAR_DMACHCRB_DTS		(1 << 8)
+#define RCAR_DMACHCRB_SLM_NORMAL	(0 << 4)
+#define RCAR_DMACHCRB_SLM_CLK(n)	((8 | (n)) << 4)
+#define RCAR_DMACHCRB_PRI(n)		((n) << 0)
+#define RCAR_DMARS			0x0040
+#define RCAR_DMABUFCR			0x0048
+#define RCAR_DMABUFCR_MBU(n)		((n) << 16)
+#define RCAR_DMABUFCR_ULB(n)		((n) << 0)
+#define RCAR_DMADPBASE			0x0050
+#define RCAR_DMADPBASE_MASK		0xfffffff0
+#define RCAR_DMADPBASE_SEL		(1 << 0)
+#define RCAR_DMADPCR			0x0054
+#define RCAR_DMADPCR_DIPT(n)		((n) << 24)
+#define RCAR_DMAFIXSAR			0x0010
+#define RCAR_DMAFIXDAR			0x0014
+#define RCAR_DMAFIXDPBASE		0x0060
+
+/* Hardcode the MEMCPY transfer size to 4 bytes. */
+#define RCAR_DMAC_MEMCPY_XFER_SIZE	4
+#define RCAR_DMAC_MAX_XFER_LEN		(RCAR_DMATCR_MASK + 1)
+
+/* -----------------------------------------------------------------------------
+ * Device access
+ */
+
+static void rcar_dmac_write(struct rcar_dmac *dmac, u32 reg, u32 data)
+{
+	if (reg == RCAR_DMAOR)
+		writew(data, dmac->iomem + reg);
+	else
+		writel(data, dmac->iomem + reg);
+}
+
+static u32 rcar_dmac_read(struct rcar_dmac *dmac, u32 reg)
+{
+	if (reg == RCAR_DMAOR)
+		return readw(dmac->iomem + reg);
+	else
+		return readl(dmac->iomem + reg);
+}
+
+static u32 rcar_dmac_chan_read(struct rcar_dmac_chan *chan, u32 reg)
+{
+	if (reg == RCAR_DMARS)
+		return readw(chan->iomem + reg);
+	else
+		return readl(chan->iomem + reg);
+}
+
+static void rcar_dmac_chan_write(struct rcar_dmac_chan *chan, u32 reg, u32 data)
+{
+	if (reg == RCAR_DMARS)
+		writew(data, chan->iomem + reg);
+	else
+		writel(data, chan->iomem + reg);
+}
+
+/* -----------------------------------------------------------------------------
+ * Initialization and configuration
+ */
+
+static bool rcar_dmac_chan_is_busy(struct rcar_dmac_chan *chan)
+{
+	u32 chcr = rcar_dmac_chan_read(chan, RCAR_DMACHCR);
+
+	return (chcr & (RCAR_DMACHCR_DE | RCAR_DMACHCR_TE)) == RCAR_DMACHCR_DE;
+}
+
+static void rcar_dmac_chan_start_xfer(struct rcar_dmac_chan *chan)
+{
+	struct rcar_dmac_desc *desc = chan->desc.running;
+	struct rcar_dmac_hw_desc *hwdesc = desc->running;
+
+	dev_dbg(chan->chan.device->dev,
+		"chan%u: queue hwdesc %p: %u@%pad -> %pad\n",
+		chan->index, hwdesc, hwdesc->size, &hwdesc->src_addr,
+		&hwdesc->dst_addr);
+
+	WARN_ON_ONCE(rcar_dmac_chan_is_busy(chan));
+
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+	rcar_dmac_chan_write(chan, RCAR_DMAFIXSAR, hwdesc->src_addr >> 32);
+	rcar_dmac_chan_write(chan, RCAR_DMAFIXDAR, hwdesc->dst_addr >> 32);
+#endif
+	rcar_dmac_chan_write(chan, RCAR_DMASAR, hwdesc->src_addr & 0xffffffff);
+	rcar_dmac_chan_write(chan, RCAR_DMADAR, hwdesc->dst_addr & 0xffffffff);
+
+	if (chan->mid_rid >= 0)
+		rcar_dmac_chan_write(chan, RCAR_DMARS, chan->mid_rid);
+
+	rcar_dmac_chan_write(chan, RCAR_DMATCR,
+			     hwdesc->size >> desc->xfer_shift);
+
+	rcar_dmac_chan_write(chan, RCAR_DMACHCR, desc->chcr | RCAR_DMACHCR_DE |
+			     RCAR_DMACHCR_IE);
+}
+
+static int rcar_dmac_init(struct rcar_dmac *dmac)
+{
+	u16 dmaor;
+
+	/* Clear all channels and enable the DMAC globally. */
+	rcar_dmac_write(dmac, RCAR_DMACHCLR, 0x7fff);
+	rcar_dmac_write(dmac, RCAR_DMAOR,
+			RCAR_DMAOR_PRI_FIXED | RCAR_DMAOR_DME);
+
+	dmaor = rcar_dmac_read(dmac, RCAR_DMAOR);
+	if ((dmaor & (RCAR_DMAOR_AE | RCAR_DMAOR_DME)) != RCAR_DMAOR_DME) {
+		dev_warn(dmac->dev, "DMAOR initialization failed.\n");
+		return -EIO;
+	}
+
+	return 0;
+}
+
+/* -----------------------------------------------------------------------------
+ * Descriptors submission
+ */
+
+static dma_cookie_t rcar_dmac_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+	struct rcar_dmac_chan *chan = to_rcar_dmac_chan(tx->chan);
+	struct rcar_dmac_desc *desc = to_rcar_dmac_desc(tx);
+	unsigned long flags;
+	dma_cookie_t cookie;
+	bool resume;
+
+	mutex_lock(&chan->power_lock);
+
+	spin_lock_irqsave(&chan->lock, flags);
+
+	cookie = dma_cookie_assign(tx);
+
+	dev_dbg(chan->chan.device->dev, "chan%u: submit #%d@%p\n",
+		chan->index, tx->cookie, desc);
+
+	list_add_tail(&desc->node, &chan->desc.pending);
+	desc->running = list_first_entry(&desc->hwdescs,
+					 struct rcar_dmac_hw_desc, node);
+
+	/* Resume the device when submitting the first descriptor. */
+	resume = chan->desc.submitted++ == 0;
+
+	spin_unlock_irqrestore(&chan->lock, flags);
+
+	if (resume)
+		pm_runtime_get_sync(chan->chan.device->dev);
+
+	mutex_unlock(&chan->power_lock);
+
+	return cookie;
+}
+
+/* -----------------------------------------------------------------------------
+ * Descriptors allocation and free
+ */
+
+/*
+ * rcar_dmac_desc_alloc - Allocate a page worth of DMA descriptors
+ * @chan: the DMA channel
+ */
+static int rcar_dmac_desc_alloc(struct rcar_dmac_chan *chan)
+{
+	struct rcar_dmac_desc_page *page;
+	LIST_HEAD(list);
+	unsigned int i;
+
+	page = (void *)get_zeroed_page(GFP_KERNEL);
+	if (!page)
+		return -ENOMEM;
+
+	for (i = 0; i < RCAR_DMAC_DESCS_PER_PAGE; ++i) {
+		struct rcar_dmac_desc *desc = &page->descs[i];
+
+		dma_async_tx_descriptor_init(&desc->async_tx, &chan->chan);
+		desc->async_tx.tx_submit = rcar_dmac_tx_submit;
+		INIT_LIST_HEAD(&desc->hwdescs);
+
+		list_add_tail(&desc->node, &list);
+	}
+
+	spin_lock_irq(&chan->lock);
+	list_splice_tail(&list, &chan->desc.free);
+	list_add_tail(&page->node, &chan->desc.pages);
+	spin_unlock_irq(&chan->lock);
+
+	return 0;
+}
+
+/*
+ * rcar_dmac_desc_put - Release a DMA transfer descriptor
+ * @chan: the DMA channel
+ * @desc: the descriptor
+ *
+ * Put the descriptor and its hardware descriptors back in the channel's free
+ * descriptors lists. The descriptor's hwdesc will be reinitialized to an empty
+ * list as a result.
+ *
+ * The descriptor must have been removed from the channel's done list before
+ * calling this function.
+ *
+ * Locking: Must be called with the channel lock held.
+ */
+static void rcar_dmac_desc_put(struct rcar_dmac_chan *chan,
+			       struct rcar_dmac_desc *desc)
+{
+	list_splice_tail_init(&desc->hwdescs, &chan->desc.hw_free);
+	list_add_tail(&desc->node, &chan->desc.free);
+}
+
+static void rcar_dmac_desc_recycle_acked(struct rcar_dmac_chan *chan)
+{
+	struct rcar_dmac_desc *desc, *_desc;
+
+	list_for_each_entry_safe(desc, _desc, &chan->desc.wait, node) {
+		if (async_tx_test_ack(&desc->async_tx)) {
+			list_del(&desc->node);
+			rcar_dmac_desc_put(chan, desc);
+		}
+	}
+}
+
+/*
+ * rcar_dmac_desc_get - Allocate a descriptor for a DMA transfer
+ * @chan: the DMA channel
+ *
+ * Locking: This function must be called in a non-atomic context.
+ *
+ * Return: A pointer to the allocated descriptor or NULL if no descriptor can
+ * be allocated.
+ */
+static struct rcar_dmac_desc *rcar_dmac_desc_get(struct rcar_dmac_chan *chan)
+{
+	struct rcar_dmac_desc *desc;
+	int ret;
+
+	spin_lock_irq(&chan->lock);
+
+	/* Recycle acked descriptors before attempting allocation. */
+	rcar_dmac_desc_recycle_acked(chan);
+
+	do {
+		if (list_empty(&chan->desc.free)) {
+			/*
+			 * No free descriptors, allocate a page worth of them
+			 * and try again, as someone else could race us to get
+			 * the newly allocated descriptors. If the allocation
+			 * fails return an error.
+			 */
+			spin_unlock_irq(&chan->lock);
+			ret = rcar_dmac_desc_alloc(chan);
+			if (ret < 0)
+				return NULL;
+			spin_lock_irq(&chan->lock);
+			continue;
+		}
+
+		desc = list_first_entry(&chan->desc.free, struct rcar_dmac_desc,
+					node);
+		list_del(&desc->node);
+	} while (!desc);
+
+	spin_unlock_irq(&chan->lock);
+
+	return desc;
+}
+
+/*
+ * rcar_dmac_hw_desc_alloc - Allocate a page worth of hardware descriptors
+ * @chan: the DMA channel
+ */
+static int rcar_dmac_hw_desc_alloc(struct rcar_dmac_chan *chan)
+{
+	struct rcar_dmac_desc_page *page;
+	LIST_HEAD(list);
+	unsigned int i;
+
+	page = (void *)get_zeroed_page(GFP_KERNEL);
+	if (!page)
+		return -ENOMEM;
+
+	for (i = 0; i < RCAR_DMAC_HWDESCS_PER_PAGE; ++i) {
+		struct rcar_dmac_hw_desc *hwdesc = &page->hwdescs[i];
+
+		list_add_tail(&hwdesc->node, &list);
+	}
+
+	spin_lock_irq(&chan->lock);
+	list_splice_tail(&list, &chan->desc.hw_free);
+	list_add_tail(&page->node, &chan->desc.pages);
+	spin_unlock_irq(&chan->lock);
+
+	return 0;
+}
+
+/*
+ * rcar_dmac_hw_desc_get - Allocate a hardware descriptor for a DMA transfer
+ * @chan: the DMA channel
+ *
+ * Locking: This function must be called in a non-atomic context.
+ *
+ * Return: A pointer to the allocated hardware descriptor or NULL if no
+ * descriptor can be allocated.
+ */
+static struct rcar_dmac_hw_desc *
+rcar_dmac_hw_desc_get(struct rcar_dmac_chan *chan)
+{
+	struct rcar_dmac_hw_desc *hwdesc;
+	int ret;
+
+	spin_lock_irq(&chan->lock);
+
+	do {
+		if (list_empty(&chan->desc.hw_free)) {
+			/*
+			 * No free descriptors, allocate a page worth of them
+			 * and try again, as someone else could race us to get
+			 * the newly allocated descriptors. If the allocation
+			 * fails return an error.
+			 */
+			spin_unlock_irq(&chan->lock);
+			ret = rcar_dmac_hw_desc_alloc(chan);
+			if (ret < 0)
+				return NULL;
+			spin_lock_irq(&chan->lock);
+			continue;
+		}
+
+		hwdesc = list_first_entry(&chan->desc.hw_free,
+					  struct rcar_dmac_hw_desc, node);
+		list_del(&hwdesc->node);
+	} while (!hwdesc);
+
+	spin_unlock_irq(&chan->lock);
+
+	return hwdesc;
+}
+
+/* -----------------------------------------------------------------------------
+ * Stop and reset
+ */
+
+static void rcar_dmac_chan_halt(struct rcar_dmac_chan *chan)
+{
+	u32 chcr = rcar_dmac_chan_read(chan, RCAR_DMACHCR);
+
+	chcr &= ~(RCAR_DMACHCR_IE | RCAR_DMACHCR_TE | RCAR_DMACHCR_DE);
+	rcar_dmac_chan_write(chan, RCAR_DMACHCR, chcr);
+}
+
+static void rcar_dmac_chan_reinit(struct rcar_dmac_chan *chan)
+{
+	struct rcar_dmac_desc *desc, *_desc;
+	unsigned long flags;
+	LIST_HEAD(descs);
+
+	spin_lock_irqsave(&chan->lock, flags);
+
+	/* Move all non-free descriptors to the local lists. */
+	list_splice_init(&chan->desc.pending, &descs);
+	list_splice_init(&chan->desc.active, &descs);
+	list_splice_init(&chan->desc.done, &descs);
+	list_splice_init(&chan->desc.wait, &descs);
+
+	/* Suspend the device if it was running. */
+	if (chan->desc.submitted)
+		pm_runtime_put(chan->chan.device->dev);
+
+	chan->desc.running = NULL;
+	chan->desc.submitted = 0;
+
+	spin_unlock_irqrestore(&chan->lock, flags);
+
+	list_for_each_entry_safe(desc, _desc, &descs, node) {
+		list_del(&desc->node);
+		rcar_dmac_desc_put(chan, desc);
+	}
+}
+
+static void rcar_dmac_chan_terminate_all(struct rcar_dmac_chan *chan)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&chan->lock, flags);
+	rcar_dmac_chan_halt(chan);
+	spin_unlock_irqrestore(&chan->lock, flags);
+
+	/*
+	 * FIXME: No new interrupt can occur now, but the IRQ thread might still
+	 * be running.
+	 */
+
+	rcar_dmac_chan_reinit(chan);
+}
+
+static void rcar_dmac_stop(struct rcar_dmac *dmac)
+{
+	rcar_dmac_write(dmac, RCAR_DMAOR, 0);
+}
+
+static void rcar_dmac_abort(struct rcar_dmac *dmac)
+{
+	unsigned int i;
+
+	/* Stop all channels. */
+	for (i = 0; i < dmac->n_channels; ++i) {
+		struct rcar_dmac_chan *chan = &dmac->channels[i];
+
+		/* Stop and reinitialize the channel. */
+		spin_lock(&chan->lock);
+		rcar_dmac_chan_halt(chan);
+		spin_unlock(&chan->lock);
+
+		rcar_dmac_chan_reinit(chan);
+	}
+}
+
+/* -----------------------------------------------------------------------------
+ * Descriptors preparation
+ */
+
+static void rcar_dmac_chan_configure_desc(struct rcar_dmac_chan *chan,
+					  struct rcar_dmac_desc *desc)
+{
+	static const u32 chcr_ts[] = {
+		RCAR_DMACHCR_TS_1B, RCAR_DMACHCR_TS_2B,
+		RCAR_DMACHCR_TS_4B, RCAR_DMACHCR_TS_8B,
+		RCAR_DMACHCR_TS_16B, RCAR_DMACHCR_TS_32B,
+		RCAR_DMACHCR_TS_64B,
+	};
+
+	unsigned int xfer_size;
+	u32 chcr;
+
+	switch (desc->direction) {
+	case DMA_DEV_TO_MEM:
+		chcr = RCAR_DMACHCR_DM_INC | RCAR_DMACHCR_SM_FIXED
+		     | RCAR_DMACHCR_RS_DMARS;
+		xfer_size = chan->xfer_size;
+		break;
+
+	case DMA_MEM_TO_DEV:
+		chcr = RCAR_DMACHCR_DM_FIXED | RCAR_DMACHCR_SM_INC
+		     | RCAR_DMACHCR_RS_DMARS;
+		xfer_size = chan->xfer_size;
+		break;
+
+	case DMA_MEM_TO_MEM:
+	default:
+		chcr = RCAR_DMACHCR_DM_INC | RCAR_DMACHCR_SM_INC
+		     | RCAR_DMACHCR_RS_AUTO;
+		xfer_size = RCAR_DMAC_MEMCPY_XFER_SIZE;
+		break;
+	}
+
+	desc->xfer_shift = ilog2(xfer_size);
+	desc->chcr = chcr | chcr_ts[desc->xfer_shift];
+}
+
+/*
+ * rcar_dmac_chan_prep_sg - prepare transfer descriptors from an SG list
+ *
+ * Common routine for public (MEMCPY) and slave DMA. The MEMCPY case is also
+ * converted to scatter-gather to guarantee consistent locking and a correct
+ * list manipulation. For slave DMA direction carries the usual meaning, and,
+ * logically, the SG list is RAM and the addr variable contains slave address,
+ * e.g., the FIFO I/O register. For MEMCPY direction equals DMA_MEM_TO_MEM
+ * and the SG list contains only one element and points at the source buffer.
+ */
+static struct dma_async_tx_descriptor *
+rcar_dmac_chan_prep_sg(struct rcar_dmac_chan *chan, struct scatterlist *sgl,
+		       unsigned int sg_len, dma_addr_t dev_addr,
+		       enum dma_transfer_direction dir, unsigned long dma_flags,
+		       bool cyclic)
+{
+	struct rcar_dmac_hw_desc *hwdesc;
+	struct rcar_dmac_desc *desc;
+	struct scatterlist *sg = sgl;
+	size_t full_size = 0;
+	unsigned int i;
+
+	desc = rcar_dmac_desc_get(chan);
+	if (!desc)
+		return NULL;
+
+	desc->async_tx.flags = dma_flags;
+	desc->async_tx.cookie = -EBUSY;
+
+	desc->cyclic = cyclic;
+	desc->direction = dir;
+
+	rcar_dmac_chan_configure_desc(chan, desc);
+
+	/*
+	 * Allocate and fill the hardware descriptors. We own the only reference
+	 * to the DMA descriptor, there's no need for locking.
+	 */
+	for_each_sg(sgl, sg, sg_len, i) {
+		dma_addr_t mem_addr = sg_dma_address(sg);
+		size_t len = sg_dma_len(sg);
+
+		full_size += len;
+
+		while (len) {
+			size_t size = min_t(size_t, len, RCAR_DMAC_MAX_XFER_LEN);
+
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+			/*
+			 * Prevent individual transfers from crossing 4GB
+			 * boundaries.
+			 */
+			if (dev_addr >> 32 != (dev_addr + size - 1) >> 32)
+				size = ALIGN(dev_addr, 1ULL << 32) - dev_addr;
+			if (mem_addr >> 32 != (mem_addr + size - 1) >> 32)
+				size = ALIGN(mem_addr, 1ULL << 32) - mem_addr;
+#endif
+
+			hwdesc = rcar_dmac_hw_desc_get(chan);
+			if (!hwdesc) {
+				rcar_dmac_desc_put(chan, desc);
+				return NULL;
+			}
+
+			if (dir == DMA_DEV_TO_MEM) {
+				hwdesc->src_addr = dev_addr;
+				hwdesc->dst_addr = mem_addr;
+			} else {
+				hwdesc->src_addr = mem_addr;
+				hwdesc->dst_addr = dev_addr;
+			}
+
+			hwdesc->size = size;
+
+			dev_dbg(chan->chan.device->dev,
+				"chan%u: hwdesc %p/%p sgl %u@%p, %u/%u %pad -> %pad\n",
+				chan->index, hwdesc, desc, i, sg, size, len,
+				&hwdesc->src_addr, &hwdesc->dst_addr);
+
+			mem_addr += size;
+			if (dir == DMA_MEM_TO_MEM)
+				dev_addr += size;
+
+			len -= size;
+
+			list_add_tail(&hwdesc->node, &desc->hwdescs);
+		}
+	}
+
+	desc->size = full_size;
+
+	return &desc->async_tx;
+}
+
+/* -----------------------------------------------------------------------------
+ * DMA engine operations
+ */
+
+static int rcar_dmac_alloc_chan_resources(struct dma_chan *chan)
+{
+	struct rcar_dmac_chan *rchan = to_rcar_dmac_chan(chan);
+	int ret;
+
+	INIT_LIST_HEAD(&rchan->desc.free);
+	INIT_LIST_HEAD(&rchan->desc.pending);
+	INIT_LIST_HEAD(&rchan->desc.active);
+	INIT_LIST_HEAD(&rchan->desc.done);
+	INIT_LIST_HEAD(&rchan->desc.wait);
+	INIT_LIST_HEAD(&rchan->desc.hw_free);
+	INIT_LIST_HEAD(&rchan->desc.pages);
+
+	/* Preallocate descriptors. */
+	ret = rcar_dmac_hw_desc_alloc(rchan);
+	if (ret < 0)
+		return -ENOMEM;
+
+	return rcar_dmac_desc_alloc(rchan);
+}
+
+static void rcar_dmac_free_chan_resources(struct dma_chan *chan)
+{
+	struct rcar_dmac_chan *rchan = to_rcar_dmac_chan(chan);
+	struct rcar_dmac *dmac = to_rcar_dmac(chan->device);
+	struct rcar_dmac_desc_page *page, *_page;
+
+	/* Protect against ISR */
+	spin_lock_irq(&rchan->lock);
+	rcar_dmac_chan_halt(rchan);
+	spin_unlock_irq(&rchan->lock);
+
+	/* Now no new interrupts will occur */
+
+	if (rchan->mid_rid >= 0) {
+		/* The caller is holding dma_list_mutex */
+		clear_bit(rchan->mid_rid, dmac->modules);
+		rchan->mid_rid = -EINVAL;
+	}
+
+	list_for_each_entry_safe(page, _page, &rchan->desc.pages, node) {
+		list_del(&page->node);
+		free_page((unsigned long)page);
+	}
+}
+
+static struct dma_async_tx_descriptor *
+rcar_dmac_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dma_dest,
+			  dma_addr_t dma_src, size_t len, unsigned long flags)
+{
+	struct rcar_dmac_chan *rchan = to_rcar_dmac_chan(chan);
+	struct scatterlist sgl;
+
+	if (!len)
+		return NULL;
+
+	sg_init_table(&sgl, 1);
+	sg_set_page(&sgl, pfn_to_page(PFN_DOWN(dma_src)), len,
+		    offset_in_page(dma_src));
+	sg_dma_address(&sgl) = dma_src;
+	sg_dma_len(&sgl) = len;
+
+	return rcar_dmac_chan_prep_sg(rchan, &sgl, 1, dma_dest,
+				      DMA_MEM_TO_MEM, flags, false);
+}
+
+static struct dma_async_tx_descriptor *
+rcar_dmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
+			unsigned int sg_len, enum dma_transfer_direction dir,
+			unsigned long flags, void *context)
+{
+	struct rcar_dmac_chan *rchan = to_rcar_dmac_chan(chan);
+
+	/* Someone calling slave DMA on a generic channel? */
+	if (rchan->mid_rid < 0 || !sg_len) {
+		dev_warn(chan->device->dev,
+			 "%s: bad parameter: len=%d, id=%d\n",
+			 __func__, sg_len, rchan->mid_rid);
+		return NULL;
+	}
+
+	return rcar_dmac_chan_prep_sg(rchan, sgl, sg_len, rchan->slave_addr,
+				      dir, flags, false);
+}
+
+#define RCAR_DMAC_MAX_SG_LEN	32
+
+static struct dma_async_tx_descriptor *
+rcar_dmac_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr,
+			  size_t buf_len, size_t period_len,
+			  enum dma_transfer_direction dir, unsigned long flags,
+			  void *context)
+{
+	struct rcar_dmac_chan *rchan = to_rcar_dmac_chan(chan);
+	struct dma_async_tx_descriptor *desc;
+	struct scatterlist *sgl;
+	unsigned int sg_len;
+	unsigned int i;
+
+	/* Someone calling slave DMA on a generic channel? */
+	if (rchan->mid_rid < 0 || buf_len < period_len) {
+		dev_warn(chan->device->dev,
+			"%s: bad parameter: buf_len=%zu, period_len=%zu, id=%d\n",
+			__func__, buf_len, period_len, rchan->mid_rid);
+		return NULL;
+	}
+
+	sg_len = buf_len / period_len;
+	if (sg_len > RCAR_DMAC_MAX_SG_LEN) {
+		dev_err(chan->device->dev,
+			"chan%u: sg length %d exceds limit %d",
+			rchan->index, sg_len, RCAR_DMAC_MAX_SG_LEN);
+		return NULL;
+	}
+
+	/*
+	 * Allocate the sg list dynamically as it would consumer too much stack
+	 * space.
+	 */
+	sgl = kcalloc(sg_len, sizeof(*sgl), GFP_KERNEL);
+	if (!sgl)
+		return NULL;
+
+	sg_init_table(sgl, sg_len);
+
+	for (i = 0; i < sg_len; ++i) {
+		dma_addr_t src = buf_addr + (period_len * i);
+
+		sg_set_page(&sgl[i], pfn_to_page(PFN_DOWN(src)), period_len,
+			    offset_in_page(src));
+		sg_dma_address(&sgl[i]) = src;
+		sg_dma_len(&sgl[i]) = period_len;
+	}
+
+	desc = rcar_dmac_chan_prep_sg(rchan, sgl, sg_len, rchan->slave_addr,
+				      dir, flags, true);
+
+	kfree(sgl);
+	return desc;
+}
+
+static void rcar_dmac_slave_config(struct rcar_dmac_chan *chan,
+				  struct dma_slave_config *cfg)
+{
+	/*
+	 * We could lock this, but you shouldn't be configuring the
+	 * channel, while using it...
+	 */
+
+	if (cfg->direction == DMA_DEV_TO_MEM) {
+		chan->slave_addr = cfg->src_addr;
+		chan->xfer_size = cfg->src_addr_width;
+	} else {
+		chan->slave_addr = cfg->dst_addr;
+		chan->xfer_size = cfg->dst_addr_width;
+	}
+}
+
+static int rcar_dmac_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
+			     unsigned long arg)
+{
+	struct rcar_dmac_chan *rchan = to_rcar_dmac_chan(chan);
+
+	switch (cmd) {
+	case DMA_TERMINATE_ALL:
+		rcar_dmac_chan_terminate_all(rchan);
+		break;
+
+	case DMA_SLAVE_CONFIG:
+		rcar_dmac_slave_config(rchan, (struct dma_slave_config *)arg);
+		break;
+
+	default:
+		return -ENXIO;
+	}
+
+	return 0;
+}
+
+static size_t rcar_dmac_chan_get_residue(struct rcar_dmac_chan *chan,
+					 dma_cookie_t cookie)
+{
+	struct rcar_dmac_desc *desc = chan->desc.running;
+	struct rcar_dmac_hw_desc *hwdesc;
+	size_t residue = 0;
+
+	if (!desc)
+		return 0;
+
+	/*
+	 * If the cookie doesn't correspond to the currently running transfer
+	 * then the descriptor hasn't been processed yet, and the residue is
+	 * equal to the full descriptor size.
+	 */
+	if (cookie != desc->async_tx.cookie)
+		return desc->size;
+
+	/* Compute the size of all chunks still to be transferred. */
+	list_for_each_entry_reverse(hwdesc, &desc->hwdescs, node) {
+		if (hwdesc == desc->running)
+			break;
+
+		residue += hwdesc->size;
+	}
+
+	/* Add the residue for the current chunk. */
+	residue += rcar_dmac_chan_read(chan, RCAR_DMATCR) << desc->xfer_shift;
+
+	return residue;
+}
+
+static enum dma_status rcar_dmac_tx_status(struct dma_chan *chan,
+					   dma_cookie_t cookie,
+					   struct dma_tx_state *txstate)
+{
+	struct rcar_dmac_chan *rchan = to_rcar_dmac_chan(chan);
+	enum dma_status status;
+	unsigned long flags;
+	size_t residue;
+
+	status = dma_cookie_status(chan, cookie, txstate);
+	if (status == DMA_COMPLETE || !txstate)
+		return status;
+
+	spin_lock_irqsave(&rchan->lock, flags);
+	residue = rcar_dmac_chan_get_residue(rchan, cookie);
+	spin_unlock_irqrestore(&rchan->lock, flags);
+
+	dma_set_residue(txstate, residue);
+
+	return status;
+}
+
+static void rcar_dmac_issue_pending(struct dma_chan *chan)
+{
+	struct rcar_dmac_chan *rchan = to_rcar_dmac_chan(chan);
+	unsigned long flags;
+
+	spin_lock_irqsave(&rchan->lock, flags);
+
+	if (list_empty(&rchan->desc.pending))
+		goto done;
+
+	/* Append the pending list to the active list. */
+	list_splice_tail_init(&rchan->desc.pending, &rchan->desc.active);
+
+	/*
+	 * If no transfer is running pick the first descriptor from the active
+	 * list and start the transfer.
+	 */
+	if (!rchan->desc.running) {
+		struct rcar_dmac_desc *desc;
+
+		desc = list_first_entry(&rchan->desc.active,
+					struct rcar_dmac_desc, node);
+		rchan->desc.running = desc;
+
+		rcar_dmac_chan_start_xfer(rchan);
+	}
+
+done:
+	spin_unlock_irqrestore(&rchan->lock, flags);
+}
+
+static int rcar_dmac_slave_caps(struct dma_chan *chan,
+				struct dma_slave_caps *caps)
+{
+	memset(caps, 0, sizeof(*caps));
+
+	/*
+	 * The device supports all widths from 1 to 64 bytes. As the
+	 * dma_slave_buswidth enumeration is limited to 8 bytes, set the
+	 * numerical value directly.
+	 */
+	caps->src_addr_widths = 0x7f;
+	caps->dstn_addr_widths = 0x7f;
+	caps->directions = BIT(DMA_MEM_TO_DEV) | BIT(DMA_DEV_TO_MEM);
+	caps->cmd_terminate = true;
+	caps->residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
+
+	return 0;
+}
+
+/* -----------------------------------------------------------------------------
+ * IRQ handling
+ */
+
+static irqreturn_t rcar_dmac_isr_transfer_end(struct rcar_dmac_chan *chan)
+{
+	struct rcar_dmac_desc *desc = chan->desc.running;
+	struct rcar_dmac_hw_desc *hwdesc;
+	irqreturn_t ret = IRQ_WAKE_THREAD;
+
+	if (WARN_ON(!desc)) {
+		/*
+		 * This should never happen, there should always be
+		 * a running descriptor when a transfer ends. Warn and
+		 * return.
+		 */
+		return IRQ_NONE;
+	}
+
+	/*
+	 * If we haven't completed the last hardware descriptor simply move to
+	 * the next one. Only wake the IRQ thread if the transfer is cyclic.
+	 */
+	hwdesc = desc->running;
+	if (!list_is_last(&hwdesc->node, &desc->hwdescs)) {
+		desc->running = list_next_entry(hwdesc, node);
+		if (!desc->cyclic)
+			ret = IRQ_HANDLED;
+		goto done;
+	}
+
+	/*
+	 * We've completed the last hardware. If the transfer is cyclic, move
+	 * back to the first one.
+	 */
+	if (desc->cyclic) {
+		desc->running = list_first_entry(&desc->hwdescs,
+						 struct rcar_dmac_hw_desc,
+						 node);
+		goto done;
+	}
+
+	/* The descriptor is complete, move it to the done list. */
+	list_move_tail(&desc->node, &chan->desc.done);
+	chan->desc.submitted--;
+
+	/* Queue the next descriptor, if any. */
+	if (!list_empty(&chan->desc.active))
+		chan->desc.running = list_first_entry(&chan->desc.active,
+						      struct rcar_dmac_desc,
+						      node);
+	else
+		chan->desc.running = NULL;
+
+	/* Suspend the device if no descriptor is pending. */
+	if (!chan->desc.submitted)
+		pm_runtime_put(chan->chan.device->dev);
+
+done:
+	if (chan->desc.running)
+		rcar_dmac_chan_start_xfer(chan);
+
+	return ret;
+}
+
+static irqreturn_t rcar_dmac_isr_channel(int irq, void *dev)
+{
+	struct rcar_dmac_chan *chan = dev;
+	irqreturn_t ret = IRQ_NONE;
+	u32 chcr;
+
+	spin_lock(&chan->lock);
+
+	chcr = rcar_dmac_chan_read(chan, RCAR_DMACHCR);
+	rcar_dmac_chan_write(chan, RCAR_DMACHCR,
+			     chcr & ~(RCAR_DMACHCR_TE | RCAR_DMACHCR_DE));
+
+	if (chcr & RCAR_DMACHCR_TE)
+		ret |= rcar_dmac_isr_transfer_end(chan);
+
+	spin_unlock(&chan->lock);
+
+	return ret;
+}
+
+static irqreturn_t rcar_dmac_isr_channel_thread(int irq, void *dev)
+{
+	struct rcar_dmac_chan *chan = dev;
+	struct rcar_dmac_desc *desc;
+
+	spin_lock_irq(&chan->lock);
+
+	/* For cyclic transfers notify the user after every chunk. */
+	if (chan->desc.running && chan->desc.running->cyclic) {
+		dma_async_tx_callback callback;
+		void *callback_param;
+
+		desc = chan->desc.running;
+		callback = desc->async_tx.callback;
+		callback_param = desc->async_tx.callback_param;
+
+		if (callback) {
+			spin_unlock_irq(&chan->lock);
+			callback(callback_param);
+			spin_lock_irq(&chan->lock);
+		}
+	}
+
+	/*
+	 * Call the callback function for all descriptors on the done list and
+	 * move them to the ack wait list.
+	 */
+	while (!list_empty(&chan->desc.done)) {
+		desc = list_first_entry(&chan->desc.done, struct rcar_dmac_desc,
+					node);
+		dma_cookie_complete(&desc->async_tx);
+		list_del(&desc->node);
+
+		if (desc->async_tx.callback) {
+			spin_unlock_irq(&chan->lock);
+			/*
+			 * We own the only reference to this descriptor, we can
+			 * safely dereference it without holding the channel
+			 * lock.
+			 */
+			desc->async_tx.callback(desc->async_tx.callback_param);
+			spin_lock_irq(&chan->lock);
+		}
+
+		list_add_tail(&desc->node, &chan->desc.wait);
+	}
+
+	/* Recycle all acked descriptors. */
+	rcar_dmac_desc_recycle_acked(chan);
+
+	spin_unlock_irq(&chan->lock);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t rcar_dmac_isr_error(int irq, void *data)
+{
+	struct rcar_dmac *dmac = data;
+
+	if (!(rcar_dmac_read(dmac, RCAR_DMAOR) & RCAR_DMAOR_AE))
+		return IRQ_NONE;
+
+	/*
+	 * An unrecoverable error occured on an unknown channel. Halt the DMAC,
+	 * abort transfers on all channels, and reinitialize the DMAC.
+	 */
+	rcar_dmac_stop(dmac);
+	rcar_dmac_abort(dmac);
+	rcar_dmac_init(dmac);
+
+	return IRQ_HANDLED;
+}
+
+/* -----------------------------------------------------------------------------
+ * OF xlate and channel filter
+ */
+
+static bool rcar_dmac_chan_filter(struct dma_chan *chan, void *arg)
+{
+	struct rcar_dmac *dmac = to_rcar_dmac(chan->device);
+	unsigned int id = (unsigned int)arg;
+
+	/*
+	 * FIXME: Using a filter on OF platforms is a nonsense. The OF xlate
+	 * function knows from which device it wants to allocate a channel from,
+	 * and would be perfectly capable of selecting the channel it wants.
+	 * Forcing it to call dma_request_channel() and iterate through all
+	 * channels from all controllers is just pointless.
+	 */
+	if (chan->device->device_control != rcar_dmac_control)
+		return false;
+
+	return !test_and_set_bit(id, dmac->modules);
+}
+
+static struct dma_chan *rcar_dmac_of_xlate(struct of_phandle_args *dma_spec,
+					   struct of_dma *ofdma)
+{
+	struct rcar_dmac_chan *rchan;
+	struct dma_chan *chan;
+	dma_cap_mask_t mask;
+
+	if (dma_spec->args_count != 1)
+		return NULL;
+
+	/* Only slave DMA channels can be allocated via DT */
+	dma_cap_zero(mask);
+	dma_cap_set(DMA_SLAVE, mask);
+
+	chan = dma_request_channel(mask, rcar_dmac_chan_filter,
+				   (void *)dma_spec->args[0]);
+	if (!chan)
+		return NULL;
+
+	rchan = to_rcar_dmac_chan(chan);
+	rchan->mid_rid = dma_spec->args[0];
+
+	return chan;
+}
+
+/* -----------------------------------------------------------------------------
+ * Power management
+ */
+
+#if defined(CONFIG_PM_SLEEP) || defined(CONFIG_PM_RUNTIME)
+static int rcar_dmac_suspend(struct device *dev)
+{
+	return 0;
+}
+
+static int rcar_dmac_resume(struct device *dev)
+{
+	struct rcar_dmac *dmac = dev_get_drvdata(dev);
+
+	return rcar_dmac_init(dmac);
+}
+#endif
+
+static const struct dev_pm_ops rcar_dmac_pm = {
+	SET_SYSTEM_SLEEP_PM_OPS(rcar_dmac_suspend, rcar_dmac_resume)
+	SET_RUNTIME_PM_OPS(rcar_dmac_suspend, rcar_dmac_resume, NULL)
+};
+
+/* -----------------------------------------------------------------------------
+ * Probe and remove
+ */
+
+static int rcar_dmac_chan_probe(struct rcar_dmac *dmac,
+				struct rcar_dmac_chan *rchan,
+				unsigned int index)
+{
+	struct platform_device *pdev = to_platform_device(dmac->dev);
+	struct dma_chan *chan = &rchan->chan;
+	char irqname[5];
+	int irq;
+	int ret;
+
+	rchan->index = index;
+	rchan->iomem = dmac->iomem + RCAR_DMAC_CHAN_OFFSET(index);
+	rchan->mid_rid = -EINVAL;
+
+	spin_lock_init(&rchan->lock);
+	mutex_init(&rchan->power_lock);
+
+	/* Request the channel interrupt. */
+	sprintf(irqname, "ch%u", index);
+	irq = platform_get_irq_byname(pdev, irqname);
+	if (irq < 0) {
+		dev_err(dmac->dev, "no IRQ specified for channel %u\n", index);
+		return -ENODEV;
+	}
+
+	rchan->irqname = devm_kmalloc(dmac->dev,
+				      strlen(dev_name(dmac->dev)) + 4,
+				      GFP_KERNEL);
+	if (!rchan->irqname)
+		return -ENOMEM;
+	sprintf(rchan->irqname, "%s:%u", dev_name(dmac->dev), index);
+
+	ret = devm_request_threaded_irq(dmac->dev, irq, rcar_dmac_isr_channel,
+					rcar_dmac_isr_channel_thread, 0,
+					rchan->irqname, rchan);
+	if (ret) {
+		dev_err(dmac->dev, "failed to request IRQ %u (%d)\n", irq, ret);
+		return ret;
+	}
+
+	/*
+	 * Initialize the DMA engine channel and add it to the DMA engine
+	 * channels list.
+	 */
+	chan->device = &dmac->engine;
+	dma_cookie_init(chan);
+
+	list_add_tail(&chan->device_node, &dmac->engine.channels);
+
+	return 0;
+}
+
+static int rcar_dmac_parse_of(struct device *dev, struct rcar_dmac *dmac)
+{
+	struct device_node *np = dev->of_node;
+	int ret;
+
+	ret = of_property_read_u32(np, "dma-channels", &dmac->n_channels);
+	if (ret < 0) {
+		dev_err(dev, "unable to read dma-channels property\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static int rcar_dmac_probe(struct platform_device *pdev)
+{
+	struct dma_device *engine;
+	struct rcar_dmac *dmac;
+	struct resource *mem;
+	unsigned int i;
+	int irq;
+	int ret;
+
+	dmac = devm_kzalloc(&pdev->dev, sizeof(*dmac), GFP_KERNEL);
+	if (!dmac)
+		return -ENOMEM;
+
+	dmac->dev = &pdev->dev;
+	platform_set_drvdata(pdev, dmac);
+
+	ret = rcar_dmac_parse_of(&pdev->dev, dmac);
+	if (ret < 0)
+		return ret;
+
+	dmac->channels = devm_kcalloc(&pdev->dev, dmac->n_channels,
+				      sizeof(*dmac->channels), GFP_KERNEL);
+	if (!dmac->channels)
+		return -ENOMEM;
+
+	/* Request resources. */
+	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	dmac->iomem = devm_ioremap_resource(&pdev->dev, mem);
+	if (IS_ERR(dmac->iomem))
+		return PTR_ERR(dmac->iomem);
+
+	irq = platform_get_irq_byname(pdev, "error");
+	if (irq < 0) {
+		dev_err(&pdev->dev, "no error IRQ specified\n");
+		return -ENODEV;
+	}
+
+	dmac->irqname = devm_kmalloc(dmac->dev, strlen(dev_name(dmac->dev)) + 7,
+				     GFP_KERNEL);
+	if (!dmac->irqname)
+		return -ENOMEM;
+	sprintf(dmac->irqname, "%s:error", dev_name(&pdev->dev));
+
+	ret = devm_request_irq(&pdev->dev, irq, rcar_dmac_isr_error, 0,
+			       dmac->irqname, dmac);
+	if (ret) {
+		dev_err(&pdev->dev, "failed to request IRQ %u (%d)\n",
+			irq, ret);
+		return ret;
+	}
+
+	/* Initialize the channels. */
+	INIT_LIST_HEAD(&dmac->engine.channels);
+
+	for (i = 0; i < dmac->n_channels; ++i) {
+		ret = rcar_dmac_chan_probe(dmac, &dmac->channels[i], i);
+		if (ret < 0)
+			return ret;
+	}
+
+	/* Enable runtime PM and initialize the device. */
+	pm_runtime_enable(&pdev->dev);
+	ret = pm_runtime_get_sync(&pdev->dev);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "runtime PM get sync failed (%d)\n", ret);
+		return ret;
+	}
+
+	ret = rcar_dmac_init(dmac);
+	pm_runtime_put(&pdev->dev);
+
+	if (ret) {
+		dev_err(&pdev->dev, "failed to reset device\n");
+		goto error;
+	}
+
+	/* Register the DMAC as a DMA provider for DT. */
+	ret = of_dma_controller_register(pdev->dev.of_node, rcar_dmac_of_xlate,
+					 NULL);
+	if (ret < 0)
+		goto error;
+
+	/*
+	 * Register the DMA engine device.
+	 *
+	 * Default transfer size of 32 bytes requires 32-byte alignment.
+	 */
+	engine = &dmac->engine;
+	dma_cap_set(DMA_MEMCPY, engine->cap_mask);
+	dma_cap_set(DMA_SLAVE, engine->cap_mask);
+
+	engine->dev = &pdev->dev;
+	engine->copy_align = ilog2(RCAR_DMAC_MEMCPY_XFER_SIZE);
+
+	engine->device_alloc_chan_resources = rcar_dmac_alloc_chan_resources;
+	engine->device_free_chan_resources = rcar_dmac_free_chan_resources;
+	engine->device_prep_dma_memcpy = rcar_dmac_prep_dma_memcpy;
+	engine->device_prep_slave_sg = rcar_dmac_prep_slave_sg;
+	engine->device_prep_dma_cyclic = rcar_dmac_prep_dma_cyclic;
+	engine->device_control = rcar_dmac_control;
+	engine->device_tx_status = rcar_dmac_tx_status;
+	engine->device_issue_pending = rcar_dmac_issue_pending;
+	engine->device_slave_caps = rcar_dmac_slave_caps;
+
+	ret = dma_async_device_register(engine);
+	if (ret < 0)
+		goto error;
+
+	return 0;
+
+error:
+	of_dma_controller_free(pdev->dev.of_node);
+	pm_runtime_disable(&pdev->dev);
+	return ret;
+}
+
+static int rcar_dmac_remove(struct platform_device *pdev)
+{
+	struct rcar_dmac *dmac = platform_get_drvdata(pdev);
+	unsigned int i;
+
+	of_dma_controller_free(pdev->dev.of_node);
+	dma_async_device_unregister(&dmac->engine);
+
+	pm_runtime_disable(&pdev->dev);
+
+	for (i = 0; i < dmac->n_channels; ++i)
+		mutex_destroy(&dmac->channels[i].power_lock);
+
+	return 0;
+}
+
+static void rcar_dmac_shutdown(struct platform_device *pdev)
+{
+	struct rcar_dmac *dmac = platform_get_drvdata(pdev);
+
+	rcar_dmac_stop(dmac);
+}
+
+static const struct of_device_id rcar_dmac_of_ids[] = {
+	{ .compatible = "renesas,rcar-dmac", },
+	{ /* Sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, rcar_dmac_of_ids);
+
+static struct platform_driver rcar_dmac_driver = {
+	.driver		= {
+		.owner	= THIS_MODULE,
+		.pm	= &rcar_dmac_pm,
+		.name	= "rcar-dmac",
+		.of_match_table = rcar_dmac_of_ids,
+	},
+	.probe		= rcar_dmac_probe,
+	.remove		= rcar_dmac_remove,
+	.shutdown	= rcar_dmac_shutdown,
+};
+
+module_platform_driver(rcar_dmac_driver);
+
+MODULE_DESCRIPTION("R-Car Gen2 DMA Controller Driver");
+MODULE_AUTHOR("Laurent Pinchart <laurent.pinchart@ideasonboard.com>");
+MODULE_LICENSE("GPL v2");