diff mbox

[1/2] dmaengine: add msm bam dma driver

Message ID 1382732643-8184-2-git-send-email-agross@codeaurora.org (mailing list archive)
State New, archived
Headers show

Commit Message

Andy Gross Oct. 25, 2013, 8:24 p.m. UTC
Add the DMA engine driver for the MSM Bus Access Manager (BAM) DMA controller
found in the MSM 8x74 platforms.

Each BAM DMA device is associated with a specific on-chip peripheral.  Each
channel provides a uni-directional data transfer engine that is capable of
transferring data between the peripheral and system memory (System mode) or
between two peripherals (BAM2BAM).

The initial release of this driver only supports slave transfers between
peripherals and system memory.

Signed-off-by: Andy Gross <agross@codeaurora.org>
---
 drivers/dma/Kconfig            |    9 +
 drivers/dma/Makefile           |    1 +
 drivers/dma/msm_bam_dma.c      |  840 ++++++++++++++++++++++++++++++++++++++++
 drivers/dma/msm_bam_dma_priv.h |  286 ++++++++++++++
 include/linux/msm_bam_dma.h    |   27 ++
 5 files changed, 1163 insertions(+)
 create mode 100644 drivers/dma/msm_bam_dma.c
 create mode 100644 drivers/dma/msm_bam_dma_priv.h
 create mode 100644 include/linux/msm_bam_dma.h

Comments

Stephen Boyd Oct. 29, 2013, 5:56 p.m. UTC | #1
On 10/25, Andy Gross wrote:
> diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
> index f238cfd..a71b415 100644
> --- a/drivers/dma/Kconfig
> +++ b/drivers/dma/Kconfig
> @@ -364,4 +364,13 @@ config DMATEST
>  	  Simple DMA test client. Say N unless you're debugging a
>  	  DMA Device driver.
>  
> +
> +config MSM_BAM_DMA
> +	tristate "MSM BAM DMA support"
> +	depends on ARCH_MSM

It would be nice if we didn't have to rely on ARCH_MSM here so we
get more build coverage.

> +	select DMA_ENGINE
> +	---help---
> +	  Enable support for the MSM BAM DMA controller.  This controller
> +	  provides DMA capabilities for a variety of on-chip devices.
> +
>  endif
> diff --git a/drivers/dma/msm_bam_dma.c b/drivers/dma/msm_bam_dma.c
> new file mode 100644
> index 0000000..d16bf94
> --- /dev/null
> +++ b/drivers/dma/msm_bam_dma.c
> @@ -0,0 +1,840 @@
> +/*
> + * MSM BAM DMA engine driver
> + *
> + * Copyright (c) 2013, The Linux Foundation. All rights reserved.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 and
> + * only version 2 as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + */
> +
> +#include <linux/kernel.h>
> +#include <linux/io.h>
> +#include <linux/init.h>
> +#include <linux/slab.h>
> +#include <linux/module.h>
> +#include <linux/interrupt.h>
> +#include <linux/dma-mapping.h>
> +#include <linux/dmaengine.h>
> +#include <linux/interrupt.h>

interrupt.h is here twice

> +#include <linux/scatterlist.h>
> +#include <linux/device.h>
> +#include <linux/platform_device.h>
> +#include <linux/of.h>
> +#include <linux/of_address.h>
> +#include <linux/of_irq.h>
> +#include <linux/of_dma.h>
> +#include <linux/clk.h>
> +#include <linux/msm_bam_dma.h>
> +
> +#include "dmaengine.h"
> +#include "msm_bam_dma_priv.h"

Why do we need this file? Can't we just put the #defines in this
file?

> +
> +
> +/*

There should be two '*'s here for kernel-doc.

> + * bam_alloc_chan - Allocate channel resources for DMA channel.
> + * @chan: specified channel
> + *
> + * This function allocates the FIFO descriptor memory and resets the channel
> + */
> +static int bam_alloc_chan(struct dma_chan *chan)
> +{
> +	struct bam_chan *bchan = to_bam_chan(chan);
> +	struct bam_device *bdev = bchan->device;
> +	u32 val;
> +	union bam_pipe_ctrl pctrl;
> +
> +	/* check for channel activity */
> +	pctrl.value = ioread32(bdev->regs + BAM_P_CTRL(bchan->id));

I recall io{read,write}*() being used for PCI and other ioport
devices. If that's right then readl/writel would be more
appropriate, and the *_relaxed variants would be even better if
the correct memory barriers are also put in place.

> +	if (pctrl.bits.p_en) {
> +		dev_err(bdev->dev, "channel already active\n");
> +		return -EINVAL;
> +	}
> +
> +	/* allocate FIFO descriptor space */
> +	bchan->fifo_virt = (struct bam_desc_hw *)dma_alloc_coherent(bdev->dev,

There isn't any need to cast from void *.

> +				BAM_DESC_FIFO_SIZE, &bchan->fifo_phys,
> +				GFP_KERNEL);
> +
> +	if (!bchan->fifo_virt) {
> +		dev_err(bdev->dev, "Failed to allocate descriptor fifo\n");
> +		return -ENOMEM;
> +	}
> +
> +	/* reset channel */
> +	iowrite32(1, bdev->regs + BAM_P_RST(bchan->id));
> +	iowrite32(0, bdev->regs + BAM_P_RST(bchan->id));
> +
> +	/* configure fifo address/size in bam channel registers */
> +	iowrite32(bchan->fifo_phys, bdev->regs +
> +			BAM_P_DESC_FIFO_ADDR(bchan->id));
> +	iowrite32(BAM_DESC_FIFO_SIZE, bdev->regs +
> +			BAM_P_FIFO_SIZES(bchan->id));
> +
> +	/* unmask and enable interrupts for defined EE, bam and error irqs */
> +	iowrite32(BAM_IRQ_MSK, bdev->regs + BAM_IRQ_SRCS_EE(bchan->ee));
> +
> +	/* enable the per pipe interrupts, enable EOT and INT irqs */
> +	iowrite32(P_DEFAULT_IRQS_EN, bdev->regs + BAM_P_IRQ_EN(bchan->id));
> +
> +	/* unmask the specific pipe and EE combo */
> +	val = ioread32(bdev->regs + BAM_IRQ_SRCS_MSK_EE(bchan->ee));
> +	val |= 1 << bchan->id;
> +	iowrite32(val, bdev->regs + BAM_IRQ_SRCS_MSK_EE(bchan->ee));
> +
> +	/* set fixed direction and mode, then enable channel */
> +	pctrl.value = 0;
> +	pctrl.bits.p_direction =
> +		(bchan->bam_slave.slave.direction == DMA_DEV_TO_MEM) ?
> +			BAM_PIPE_PRODUCER : BAM_PIPE_CONSUMER;
> +	pctrl.bits.p_sys_mode = BAM_PIPE_MODE_SYSTEM;
> +	pctrl.bits.p_en = 1;
> +
> +	iowrite32(pctrl.value, bdev->regs + BAM_P_CTRL(bchan->id));
> +
> +	/* set desc threshold */

Is this a TODO?
`
> +	/* do bookkeeping for tracking used EEs, used during IRQ handling */
> +	set_bit(bchan->ee, &bdev->enabled_ees);
> +
> +	bchan->head = 0;
> +	bchan->tail = 0;
> +
> +	return 0;
> +}
> +
> +/*
> + * bam_free_chan - Frees dma resources associated with specific channel
> + * @chan: specified channel
> + *
> + * Free the allocated fifo descriptor memory and channel resources
> + *
> + */
> +static void bam_free_chan(struct dma_chan *chan)
> +{
> +	struct bam_chan *bchan = to_bam_chan(chan);
> +	struct bam_device *bdev = bchan->device;
> +	u32 val;
> +
> +	/* reset channel */
> +	iowrite32(1, bdev->regs + BAM_P_RST(bchan->id));
> +	iowrite32(0, bdev->regs + BAM_P_RST(bchan->id));
> +
> +	dma_free_coherent(bdev->dev, BAM_DESC_FIFO_SIZE, bchan->fifo_virt,
> +				bchan->fifo_phys);
> +
> +	/* mask irq for pipe/channel */
> +	val = ioread32(bdev->regs + BAM_IRQ_SRCS_MSK_EE(bchan->ee));
> +	val &= ~(1 << bchan->id);
> +	iowrite32(val, bdev->regs + BAM_IRQ_SRCS_MSK_EE(bchan->ee));
> +
> +	/* disable irq */
> +	iowrite32(0, bdev->regs + BAM_P_IRQ_EN(bchan->id));
> +
> +	clear_bit(bchan->ee, &bdev->enabled_ees);
> +}
> +
> +/*
> + * bam_slave_config - set slave configuration for channel
> + * @chan: dma channel
> + * @cfg: slave configuration
> + *
> + * Sets slave configuration for channel
> + * Only allow setting direction once.  BAM channels are unidirectional
> + * and the direction is set in hardware.
> + *
> + */
> +static void bam_slave_config(struct bam_chan *bchan,
> +		struct bam_dma_slave_config *bcfg)
> +{
> +	struct bam_device *bdev = bchan->device;
> +
> +	bchan->bam_slave.desc_threshold = bcfg->desc_threshold;
> +
> +	/* set desc threshold */
> +	iowrite32(bcfg->desc_threshold, bdev->regs + BAM_DESC_CNT_TRSHLD);
> +}
> +
> +/*
> + * bam_start_dma - loads up descriptors and starts dma
> + * @chan: dma channel
> + *
> + * Loads descriptors into descriptor fifo and starts dma controller
> + *
> + * NOTE: Must hold channel lock
> +*/
> +static void bam_start_dma(struct bam_chan *bchan)
> +{
> +	struct bam_device *bdev = bchan->device;
> +	struct bam_async_desc *async_desc, *_adesc;
> +	u32 curr_len, val;
> +	u32 num_processed = 0;
> +
> +	if (list_empty(&bchan->pending))
> +		return;
> +
> +	curr_len = (bchan->head <= bchan->tail) ?
> +			bchan->tail - bchan->head :
> +			MAX_DESCRIPTORS - bchan->head + bchan->tail;
> +
> +	list_for_each_entry_safe(async_desc, _adesc, &bchan->pending, node) {
> +
> +		/* bust out if we are out of room */
> +		if (async_desc->num_desc + curr_len > MAX_DESCRIPTORS)
> +			break;
> +
> +		/* copy descriptors into fifo */
> +		if (bchan->tail + async_desc->num_desc > MAX_DESCRIPTORS) {
> +			u32 partial = MAX_DESCRIPTORS - bchan->tail;
> +
> +			memcpy(&bchan->fifo_virt[bchan->tail], async_desc->desc,
> +				partial * sizeof(struct bam_desc_hw));
> +			memcpy(bchan->fifo_virt, &async_desc->desc[partial],
> +				(async_desc->num_desc - partial) *
> +					sizeof(struct bam_desc_hw));
> +		} else {
> +			memcpy(&bchan->fifo_virt[bchan->tail], async_desc->desc,
> +				async_desc->num_desc *
> +				sizeof(struct bam_desc_hw));
> +		}
> +
> +		num_processed++;
> +		bchan->tail += async_desc->num_desc;
> +		bchan->tail %= MAX_DESCRIPTORS;
> +		curr_len += async_desc->num_desc;

I wonder if you could use the circ_buf API here.

> +
> +		list_move_tail(&async_desc->node, &bchan->active);
> +	}
> +
> +	/* bail if we didn't queue anything to the active queue */
> +	if (!num_processed)
> +		return;
> +
> +	async_desc = list_first_entry(&bchan->active, struct bam_async_desc,
> +			node);
> +
> +	val = ioread32(bdev->regs + BAM_P_SW_OFSTS(bchan->id));
> +	val &= P_SW_OFSTS_MASK;
> +
> +	/* kick off dma by forcing a write event to the pipe */
> +	iowrite32((bchan->tail * sizeof(struct bam_desc_hw)),
> +			bdev->regs + BAM_P_EVNT_REG(bchan->id));
> +}
> +
> +/*
> + * bam_tx_submit - Adds transaction to channel pending queue
> + * @tx: transaction to queue
> + *
> + * Adds dma transaction to pending queue for channel
> + *
> +*/
> +static dma_cookie_t bam_tx_submit(struct dma_async_tx_descriptor *tx)
> +{
> +	struct bam_chan *bchan = to_bam_chan(tx->chan);
> +	struct bam_async_desc *desc = to_bam_async_desc(tx);
> +	dma_cookie_t cookie;
> +
> +	spin_lock_bh(&bchan->lock);
> +
> +	cookie = dma_cookie_assign(tx);
> +	list_add_tail(&desc->node, &bchan->pending);
> +
> +	spin_unlock_bh(&bchan->lock);
> +
> +	return cookie;
> +}
> +
> +/*
> + * bam_prep_slave_sg - Prep slave sg transaction
> + *
> + * @chan: dma channel
> + * @sgl: scatter gather list
> + * @sg_len: length of sg
> + * @direction: DMA transfer direction
> + * @flags: DMA flags
> + * @context: transfer context (unused)
> + */
> +static struct dma_async_tx_descriptor *bam_prep_slave_sg(struct dma_chan *chan,
> +	struct scatterlist *sgl, unsigned int sg_len,
> +	enum dma_transfer_direction direction, unsigned long flags,
> +	void *context)
> +{
> +	struct bam_chan *bchan = to_bam_chan(chan);
> +	struct bam_device *bdev = bchan->device;
> +	struct bam_async_desc *async_desc = NULL;

Useless assignment? Just return NULL instead of the next 3 goto's and
this can be avoided.

> +	struct scatterlist *sg;
> +	u32 i;
> +	struct bam_desc_hw *desc;
> +
> +
> +	if (!is_slave_direction(direction)) {
> +		dev_err(bdev->dev, "invalid dma direction\n");
> +		goto err_out;
> +	}

I'm surprised the core framework doesn't do this.

> +
> +	/* direction has to match pipe configuration from the slave config */
> +	if (direction != bchan->bam_slave.slave.direction) {
> +		dev_err(bdev->dev,
> +				"trans does not match channel configuration\n");

s/trans/transfer/ ?

> +		goto err_out;
> +	}
> +
> +	/* make sure number of descriptors will fit within the fifo */
> +	if (sg_len > MAX_DESCRIPTORS) {
> +		dev_err(bdev->dev, "not enough fifo descriptor space\n");
> +		goto err_out;
> +	}
> +
> +	/* allocate enough room to accomodate the number of entries */
> +	async_desc = kzalloc(sizeof(*async_desc) +
> +			(sg_len * sizeof(struct bam_desc_hw)), GFP_KERNEL);
> +
> +	if (!async_desc) {
> +		dev_err(bdev->dev, "failed to allocate async descriptor\n");
> +		goto err_out;
> +	}
> +
> +	async_desc->num_desc = sg_len;
> +	async_desc->dir = (direction == DMA_DEV_TO_MEM) ? BAM_PIPE_PRODUCER :
> +				BAM_PIPE_CONSUMER;
> +
> +	/* fill in descriptors, align hw descriptor to 8 bytes */
> +	desc = async_desc->desc;
> +	for_each_sg(sgl, sg, sg_len, i) {
> +		if (sg_dma_len(sg) > BAM_MAX_DATA_SIZE) {
> +			dev_err(bdev->dev, "segment exceeds max size\n");
> +			goto err_out;
> +		}
> +
> +		desc->addr = sg_dma_address(sg);
> +		desc->size = sg_dma_len(sg);
> +		desc++;
> +	}
> +
> +	/* set EOT flag on last descriptor, we want IRQ on completion */
> +	async_desc->desc[async_desc->num_desc-1].flags |= DESC_FLAG_EOT;

Please add space between num_desc, dash, and 1.

> +
> +	dma_async_tx_descriptor_init(&async_desc->txd, chan);
> +	async_desc->txd.tx_submit = bam_tx_submit;
> +
> +	return &async_desc->txd;
> +
> +err_out:
> +	kfree(async_desc);
> +	return NULL;
> +}
> +
> +/*
> + * bam_dma_terminate_all - terminate all transactions
> + * @chan: dma channel
> + *
> + * Idles channel and dequeues and frees all transactions
> + * No callbacks are done
> + *
> +*/
> +static void bam_dma_terminate_all(struct dma_chan *chan)
> +{
> +	struct bam_chan *bchan = to_bam_chan(chan);
> +	struct bam_device *bdev = bchan->device;
> +	LIST_HEAD(desc_cleanup);
> +	struct bam_async_desc *desc, *_desc;
> +
> +	spin_lock_bh(&bchan->lock);
> +
> +	/* reset channel */
> +	iowrite32(1, bdev->regs + BAM_P_RST(bchan->id));
> +	iowrite32(0, bdev->regs + BAM_P_RST(bchan->id));
> +
> +	/* grab all the descriptors and free them */
> +	list_splice_tail_init(&bchan->pending, &desc_cleanup);
> +	list_splice_tail_init(&bchan->active, &desc_cleanup);
> +
> +	list_for_each_entry_safe(desc, _desc, &desc_cleanup, node)
> +		kfree(desc);
> +
> +	spin_unlock_bh(&bchan->lock);
> +}
> +
> +/*
> + * bam_control - DMA device control
> + * @chan: dma channel
> + * @cmd: control cmd
> + * @arg: cmd argument
> + *
> + * Perform DMA control command
> + *
> +*/
> +static int bam_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
> +	unsigned long arg)
> +{
> +	struct bam_chan *bchan = to_bam_chan(chan);
> +	struct bam_device *bdev = bchan->device;
> +	struct bam_dma_slave_config *bconfig;
> +	int ret = 0;
> +
> +	switch (cmd) {
> +	case DMA_PAUSE:
> +		spin_lock_bh(&bchan->lock);
> +		iowrite32(1, bdev->regs + BAM_P_HALT(bchan->id));
> +		spin_unlock_bh(&bchan->lock);
> +		break;
> +	case DMA_RESUME:
> +		spin_lock_bh(&bchan->lock);
> +		iowrite32(0, bdev->regs + BAM_P_HALT(bchan->id));
> +		spin_unlock_bh(&bchan->lock);
> +		break;
> +	case DMA_TERMINATE_ALL:
> +		bam_dma_terminate_all(chan);
> +		break;
> +	case DMA_SLAVE_CONFIG:
> +		bconfig = (struct bam_dma_slave_config *)arg;
> +		bam_slave_config(bchan, bconfig);
> +		break;
> +	default:
> +		ret = -EIO;
> +		break;
> +	}
> +
> +	return ret;
> +}
> +
> +/*
> + * process_irqs_per_ee - processes the interrupts for a specific ee
> + * @bam: bam controller
> + * @ee: execution environment
> + *
> + * This function processes the interrupts for a given execution environment
> + *
> + */
> +static u32 process_irqs_per_ee(struct bam_device *bdev,
> +	u32 ee)
> +{
> +	u32 i, srcs, stts, pipe_stts;
> +	u32 clr_mask = 0;
> +
> +
> +	srcs = ioread32(bdev->regs + BAM_IRQ_SRCS_EE(ee));
> +
> +	/* check for general bam error */
> +	if (srcs & BAM_IRQ) {
> +		stts = ioread32(bdev->regs + BAM_IRQ_STTS);
> +		clr_mask |= stts;
> +	}
> +
> +	/* check pipes / channels */
> +	if (srcs & P_IRQ) {
> +
> +		for (i = 0; i < bdev->num_channels; i++) {
> +			if (srcs & (1 << i)) {
> +				/* clear pipe irq */
> +				pipe_stts = ioread32(bdev->regs +
> +					BAM_P_IRQ_STTS(i));
> +
> +				iowrite32(pipe_stts, bdev->regs +
> +					BAM_P_IRQ_CLR(i));
> +
> +				/* schedule channel work */
> +				tasklet_schedule(&bdev->channels[i].tasklet);

Maybe this little hunk inside the for loop deserves another
function because we're pretty deeply nested here. Or invert the
logic so that if (!(srcs & P_IRQ)) returns clr_mask and then this
can be less indented.

> +			}
> +		}
> +	}
> +
> +	return clr_mask;
> +}
> +
> +/*
> + * bam_dma_irq - irq handler for bam controller
> + * @irq: IRQ of interrupt
> + * @data: callback data
> + *
> + * IRQ handler for the bam controller
> + */
> +static irqreturn_t bam_dma_irq(int irq, void *data)
> +{
> +	struct bam_device *bdev = (struct bam_device *)data;

Unnecessary cast.

> +	u32 clr_mask = 0;
> +	u32 i;
> +
> +
> +	for (i = 0; i < bdev->num_ees; i++) {
> +		if (test_bit(i, &bdev->enabled_ees))
> +			clr_mask |= process_irqs_per_ee(bdev, i);
> +	}

These braces aren't really necessary.

> +
> +	iowrite32(clr_mask, bdev->regs + BAM_IRQ_CLR);
> +
> +	return IRQ_HANDLED;
> +}
> +
> +/*
> + * bam_tx_status - returns status of transaction
> + * @chan: dma channel
> + * @cookie: transaction cookie
> + * @txstate: DMA transaction state
> + *
> + * Return status of dma transaction
> + */
> +static enum dma_status bam_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
> +		struct dma_tx_state *txstate)
> +{
> +	return dma_cookie_status(chan, cookie, txstate);
> +}

Do we actually need this function? Can't we just assign
dma_cookie_status to device_tx_status below?

> +
> +/*
> + * dma_tasklet - DMA IRQ tasklet
> + * @data: tasklet argument (bam controller structure)
> + *
> + * Sets up next DMA operation and then processes all completed transactions
> + */
> +static void dma_tasklet(unsigned long data)
> +{
> +	struct bam_chan *bchan = (struct bam_chan *)data;
> +	struct bam_async_desc *desc, *_desc;
> +	LIST_HEAD(desc_cleanup);
> +	u32 fifo_length;
> +
> +
> +	spin_lock_bh(&bchan->lock);
> +
> +	if (list_empty(&bchan->active))
> +		goto out;
> +
> +	fifo_length = (bchan->head <= bchan->tail) ?
> +		bchan->tail - bchan->head :
> +		MAX_DESCRIPTORS - bchan->head + bchan->tail;

This is fairly complicated. Can't we just write it like this?

	if (bchan->head <= bchan->tail)
		fifo_length = bchan->tail - bchan->head;
	else
		fifo_length = MAX_DESCRIPTORS - bchan->head + bchan->tail;

> +
> +	/* only process those which are currently done */
> +	list_for_each_entry_safe(desc, _desc, &bchan->active, node) {
> +		if (desc->num_desc > fifo_length)
> +			break;
> +
> +		dma_cookie_complete(&desc->txd);
> +
> +		list_move_tail(&desc->node, &desc_cleanup);
> +		fifo_length -= desc->num_desc;
> +		bchan->head += desc->num_desc;
> +		bchan->head %= MAX_DESCRIPTORS;
> +	}
> +
> +out:
> +	/* kick off processing of any queued descriptors */
> +	bam_start_dma(bchan);
> +
> +	spin_unlock_bh(&bchan->lock);
> +
> +	/* process completed descriptors */
> +	list_for_each_entry_safe(desc, _desc, &desc_cleanup, node) {
> +		if (desc->txd.callback)
> +			desc->txd.callback(desc->txd.callback_param);
> +
> +		kfree(desc);
> +	}
> +}
> +
> +/*
> + * bam_issue_pending - starts pending transactions
> + * @chan: dma channel
> + *
> + * Calls tasklet directly which in turn starts any pending transactions
> + */
> +static void bam_issue_pending(struct dma_chan *chan)
> +{
> +	dma_tasklet((unsigned long)chan);
> +}
> +
> +struct bam_filter_args {
> +	struct dma_device *dev;
> +	u32 id;
> +	u32 ee;
> +	u32 dir;
> +};
> +
> +static bool bam_dma_filter(struct dma_chan *chan, void *data)
> +{
> +	struct bam_filter_args *args = data;
> +	struct bam_chan *bchan = to_bam_chan(chan);
> +
> +	if (args->dev == chan->device &&
> +		args->id == bchan->id) {
> +
> +		/* we found the channel, so lets set the EE and dir */
> +		bchan->ee = args->ee;
> +		bchan->bam_slave.slave.direction = args->dir ?
> +				DMA_DEV_TO_MEM : DMA_MEM_TO_DEV;
> +		return true;
> +	}
> +
> +	return false;
> +}
> +
> +static struct dma_chan *bam_dma_xlate(struct of_phandle_args *dma_spec,
> +		struct of_dma *of)
> +{
> +	struct bam_filter_args args;
> +	dma_cap_mask_t cap;
> +
> +	if (dma_spec->args_count != 3)
> +		return NULL;
> +
> +	args.dev = of->of_dma_data;
> +	args.id = dma_spec->args[0];
> +	args.ee = dma_spec->args[1];
> +	args.dir = dma_spec->args[2];
> +
> +	dma_cap_zero(cap);
> +	dma_cap_set(DMA_SLAVE, cap);
> +
> +	return dma_request_channel(cap, bam_dma_filter, &args);
> +}
> +
> +/*
> + * bam_init
> + * @bdev: bam device
> + *
> + * Initialization helper for global bam registers
> + */
> +static int bam_init(struct bam_device *bdev)
> +{
> +	union bam_num_pipes num_pipes;
> +	union bam_ctrl ctrl;
> +	union bam_cnfg_bits cnfg_bits;
> +	union bam_revision revision;
> +
> +	/* read versioning information */
> +	revision.value = ioread32(bdev->regs + BAM_REVISION);
> +	bdev->num_ees = revision.bits.num_ees;
> +
> +	num_pipes.value = ioread32(bdev->regs + BAM_NUM_PIPES);
> +	bdev->num_channels = num_pipes.bits.bam_num_pipes;
> +
> +	/* s/w reset bam */
> +	/* after reset all pipes are disabled and idle */
> +	ctrl.value = ioread32(bdev->regs + BAM_CTRL);
> +	ctrl.bits.bam_sw_rst = 1;
> +	iowrite32(ctrl.value, bdev->regs + BAM_CTRL);
> +	ctrl.bits.bam_sw_rst = 0;
> +	iowrite32(ctrl.value, bdev->regs + BAM_CTRL);
> +
> +	/* enable bam */
> +	ctrl.bits.bam_en = 1;
> +	iowrite32(ctrl.value, bdev->regs + BAM_CTRL);
> +
> +	/* set descriptor threshhold, start with 4 bytes */
> +	iowrite32(DEFAULT_CNT_THRSHLD, bdev->regs + BAM_DESC_CNT_TRSHLD);
> +
> +	/* set config bits for h/w workarounds */
> +	/* Enable all workarounds except BAM_FULL_PIPE */
> +	cnfg_bits.value = 0xffffffff;
> +	cnfg_bits.bits.obsolete = 0;
> +	cnfg_bits.bits.obsolete2 = 0;
> +	cnfg_bits.bits.reserved = 0;
> +	cnfg_bits.bits.reserved2 = 0;
> +	cnfg_bits.bits.bam_full_pipe = 0;
> +	iowrite32(cnfg_bits.value, bdev->regs + BAM_CNFG_BITS);
> +
> +	/* enable irqs for errors */
> +	iowrite32(BAM_ERROR_EN | BAM_HRESP_ERR_EN, bdev->regs + BAM_IRQ_EN);
> +	return 0;
> +}
> +
> +static void bam_channel_init(struct bam_device *bdev, struct bam_chan *bchan,
> +	u32 index)
> +{
> +	bchan->id = index;
> +	bchan->common.device = &bdev->common;
> +	bchan->device = bdev;
> +	spin_lock_init(&bchan->lock);
> +
> +	INIT_LIST_HEAD(&bchan->pending);
> +	INIT_LIST_HEAD(&bchan->active);
> +
> +	dma_cookie_init(&bchan->common);
> +	list_add_tail(&bchan->common.device_node,
> +		&bdev->common.channels);
> +
> +	tasklet_init(&bchan->tasklet, dma_tasklet, (unsigned long)bchan);
> +
> +	/* reset channel - just to be sure */
> +	iowrite32(1, bdev->regs + BAM_P_RST(bchan->id));
> +	iowrite32(0, bdev->regs + BAM_P_RST(bchan->id));
> +}
> +
> +static int bam_dma_probe(struct platform_device *pdev)
> +{
> +	struct bam_device *bdev;
> +	int err, i;
> +
> +	bdev = kzalloc(sizeof(*bdev), GFP_KERNEL);

Please use devm_kzalloc() here to simplify error paths.

> +	if (!bdev) {
> +		dev_err(&pdev->dev, "insufficient memory for private data\n");

kmalloc calls already print errors when they fail, so this can be
removed.

> +		err = -ENOMEM;
> +		goto err_no_bdev;

Just return the error here.

> +	}
> +
> +	bdev->dev = &pdev->dev;
> +	dev_set_drvdata(bdev->dev, bdev);
> +
> +	bdev->regs = of_iomap(pdev->dev.of_node, 0);
> +	if (!bdev->regs) {
> +		dev_err(bdev->dev, "unable to ioremap base\n");
> +		err = -ENOMEM;
> +		goto err_free_bamdev;
> +	}
> +
> +	bdev->irq = irq_of_parse_and_map(pdev->dev.of_node, 0);
> +	if (bdev->irq == NO_IRQ) {
> +		dev_err(bdev->dev, "unable to map irq\n");
> +		err = -EINVAL;
> +		goto err_unmap_mem;
> +	}

Please use platform_* APIs here, this is a platform driver after
all. Notably, use platform_get_irq() and platform_get_resource()
followed by devm_ioremap_resource().

> +
> +	bdev->bamclk = devm_clk_get(bdev->dev, "bam_clk");
> +	if (IS_ERR(bdev->bamclk)) {
> +		err = PTR_ERR(bdev->bamclk);
> +		goto err_free_irq;
> +	}
> +
> +	err = clk_prepare_enable(bdev->bamclk);
> +	if (err) {
> +		dev_err(bdev->dev, "failed to prepare/enable clock");
> +		goto err_free_irq;
> +	}
> +
> +	err = request_irq(bdev->irq, &bam_dma_irq, IRQF_TRIGGER_HIGH, "bam_dma",
> +				bdev);

Can be devm_request_irq(). Shouldn't this be done much later in
the probe? It looks like bdev isn't fully setup yet.

> +	if (err) {
> +		dev_err(bdev->dev, "error requesting irq\n");
> +		err = -EINVAL;
> +		goto err_disable_clk;
> +	}
> +
> +	if (bam_init(bdev)) {
> +		dev_err(bdev->dev, "cannot initialize bam device\n");
> +		err = -EINVAL;
> +		goto err_disable_clk;
> +	}
> +
> +	bdev->channels = kzalloc(sizeof(*bdev->channels) * bdev->num_channels,
> +				GFP_KERNEL);
> +

We're going to have devm_kcalloc() in 3.13 so you should be able
to use that here.

> +	if (!bdev->channels) {
> +		dev_err(bdev->dev, "unable to allocate channels\n");
> +		err = -ENOMEM;
> +		goto err_disable_clk;
> +	}
> +
> +	/* allocate and initialize channels */
> +	INIT_LIST_HEAD(&bdev->common.channels);
> +
> +	for (i = 0; i < bdev->num_channels; i++)
> +		bam_channel_init(bdev, &bdev->channels[i], i);
> +
> +	/* set max dma segment size */
> +	bdev->common.dev = bdev->dev;
> +	bdev->common.dev->dma_parms = &bdev->dma_parms;
> +	if (dma_set_max_seg_size(bdev->common.dev, BAM_MAX_DATA_SIZE)) {
> +		dev_err(bdev->dev, "cannot set maximum segment size\n");
> +		goto err_disable_clk;
> +	}
> +
> +	/* set capabilities */
> +	dma_cap_zero(bdev->common.cap_mask);
> +	dma_cap_set(DMA_SLAVE, bdev->common.cap_mask);
> +
> +	/* initialize dmaengine apis */
> +	bdev->common.device_alloc_chan_resources = bam_alloc_chan;
> +	bdev->common.device_free_chan_resources = bam_free_chan;
> +	bdev->common.device_prep_slave_sg = bam_prep_slave_sg;
> +	bdev->common.device_control = bam_control;
> +	bdev->common.device_issue_pending = bam_issue_pending;
> +	bdev->common.device_tx_status = bam_tx_status;
> +	bdev->common.dev = bdev->dev;
> +
> +	dma_async_device_register(&bdev->common);

This can fail. Please check error codes.

> +
> +	if (pdev->dev.of_node) {
> +		err = of_dma_controller_register(pdev->dev.of_node,
> +				bam_dma_xlate, &bdev->common);
> +
> +		if (err) {
> +			dev_err(bdev->dev, "failed to register of_dma\n");
> +			goto err_unregister_dma;
> +		}
> +	}
> +
> +	return 0;
> +
> +err_unregister_dma:
> +	dma_async_device_unregister(&bdev->common);
> +err_free_irq:
> +	free_irq(bdev->irq, bdev);
> +err_disable_clk:
> +	clk_disable_unprepare(bdev->bamclk);
> +err_unmap_mem:
> +	iounmap(bdev->regs);
> +err_free_bamdev:
> +	if (bdev)
> +		kfree(bdev->channels);
> +	kfree(bdev);
> +err_no_bdev:
> +	return err;
> +}
> +
> +static int bam_dma_remove(struct platform_device *pdev)
> +{
> +	struct bam_device *bdev;
> +
> +	bdev = dev_get_drvdata(&pdev->dev);
> +	dev_set_drvdata(&pdev->dev, NULL);

This is unnecessary.

> +
> +	dma_async_device_unregister(&bdev->common);
> +
> +	if (bdev) {

Ouch. We just dereferenced bdev one line before so it seems that
this check is unnecessary.

> +		free_irq(bdev->irq, bdev);
> +		clk_disable_unprepare(bdev->bamclk);
> +		iounmap(bdev->regs);
> +		kfree(bdev->channels);
> +	}
> +
> +	kfree(bdev);
> +	return 0;
> +}
[...]
> +
> +static int __init bam_dma_init(void)
> +{
> +	return platform_driver_register(&bam_dma_driver);
> +}
> +
> +static void __exit bam_dma_exit(void)
> +{
> +	return platform_driver_unregister(&bam_dma_driver);
> +}
> +
> +arch_initcall(bam_dma_init);
> +module_exit(bam_dma_exit);

module_platform_driver()? Or is there some probe deferral problem
that isn't resolved?

> +
> +MODULE_AUTHOR("Andy Gross <agross@codeaurora.org>");
> +MODULE_DESCRIPTION("MSM BAM DMA engine driver");
> +MODULE_LICENSE("GPL v2");
> diff --git a/drivers/dma/msm_bam_dma_priv.h b/drivers/dma/msm_bam_dma_priv.h
> new file mode 100644
> index 0000000..4d6a10c7
> --- /dev/null
> +++ b/drivers/dma/msm_bam_dma_priv.h
> @@ -0,0 +1,286 @@
> +/*
> + * Copyright (c) 2013, The Linux Foundation. All rights reserved.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 and
> + * only version 2 as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + */
> +#ifndef __MSM_BAM_DMA_PRIV_H__
> +#define __MSM_BAM_DMA_PRIV_H__
> +
> +#include <linux/dmaengine.h>
> +
> +enum bam_channel_mode {
> +	BAM_PIPE_MODE_BAM2BAM = 0,	/* BAM to BAM aka device to device */
> +	BAM_PIPE_MODE_SYSTEM,		/* BAM to/from System Memory */
> +};
> +
> +enum bam_channel_dir {
> +	BAM_PIPE_CONSUMER = 0,	/* channel reads from data-fifo or memory */
> +	BAM_PIPE_PRODUCER,	/* channel writes to data-fifo or memory */
> +};
> +
> +struct bam_desc_hw {
> +	u32 addr;		/* Buffer physical address */
> +	u32 size:16;		/* Buffer size in bytes */
> +	u32 flags:16;
> +};

Is this an in memory structure that the hardware reads? It should
probably use the correct types (i.e. u16 instead of bit fields)
and then be marked as __packed__ so that compile doesn't mess
up the alignment.

> +
> +#define DESC_FLAG_INT	(1<<15)
> +#define DESC_FLAG_EOT	(1<<14)
> +#define DESC_FLAG_EOB	(1<<13)

Space around << here please. Or use the BIT() macro.

> +
> +struct bam_async_desc {
> +	struct list_head node;
> +	struct dma_async_tx_descriptor txd;
> +	u32 num_desc;
> +	enum bam_channel_dir dir;
> +	u32 fifo_pos;
> +	struct bam_desc_hw desc[0];
> +};
> +
> +static inline struct bam_async_desc *to_bam_async_desc(
> +		struct dma_async_tx_descriptor *txd)
> +{
> +	return container_of(txd, struct bam_async_desc, txd);
> +}
> +
> +
> +#define BAM_CTRL			0x0000
> +#define BAM_REVISION			0x0004
> +#define BAM_SW_REVISION			0x0080
> +#define BAM_NUM_PIPES			0x003C
> +#define BAM_TIMER			0x0040
> +#define BAM_TIMER_CTRL			0x0044
> +#define BAM_DESC_CNT_TRSHLD		0x0008
> +#define BAM_IRQ_SRCS			0x000C
> +#define BAM_IRQ_SRCS_MSK		0x0010
> +#define BAM_IRQ_SRCS_UNMASKED		0x0030
> +#define BAM_IRQ_STTS			0x0014
> +#define BAM_IRQ_CLR			0x0018
> +#define BAM_IRQ_EN			0x001C
> +#define BAM_CNFG_BITS			0x007C
> +#define BAM_IRQ_SRCS_EE(x)		(0x0800 + ((x) * 0x80))
> +#define BAM_IRQ_SRCS_MSK_EE(x)		(0x0804 + ((x) * 0x80))
> +#define BAM_P_CTRL(x)			(0x1000 + ((x) * 0x1000))
> +#define BAM_P_RST(x)			(0x1004 + ((x) * 0x1000))
> +#define BAM_P_HALT(x)			(0x1008 + ((x) * 0x1000))
> +#define BAM_P_IRQ_STTS(x)		(0x1010 + ((x) * 0x1000))
> +#define BAM_P_IRQ_CLR(x)		(0x1014 + ((x) * 0x1000))
> +#define BAM_P_IRQ_EN(x)			(0x1018 + ((x) * 0x1000))
> +#define BAM_P_EVNT_DEST_ADDR(x)		(0x182C + ((x) * 0x1000))
> +#define BAM_P_EVNT_REG(x)		(0x1818 + ((x) * 0x1000))
> +#define BAM_P_SW_OFSTS(x)		(0x1800 + ((x) * 0x1000))
> +#define BAM_P_DATA_FIFO_ADDR(x)		(0x1824 + ((x) * 0x1000))
> +#define BAM_P_DESC_FIFO_ADDR(x)		(0x181C + ((x) * 0x1000))
> +#define BAM_P_EVNT_TRSHLD(x)		(0x1828 + ((x) * 0x1000))
> +#define BAM_P_FIFO_SIZES(x)		(0x1820 + ((x) * 0x1000))

If you have the columns it might be nice to s/x/pipe/ so that it's
clear what 'x' is.

> +
> +union bam_ctrl {
> +	u32 value;
> +	struct {
> +		u32 bam_sw_rst:1;
> +		u32 bam_en:1;
> +		u32 reserved3:2;
> +		u32 bam_en_accum:1;
> +		u32 testbus_sel:7;
> +		u32 reserved2:1;
> +		u32 bam_desc_cache_sel:2;
> +		u32 bam_cached_desc_store:1;
> +		u32 ibc_disable:1;
> +		u32 reserved1:15;
> +	} bits;
> +};
> +
> +union bam_revision {
> +	u32 value;
> +	struct {
> +		u32 revision:8;
> +		u32 num_ees:4;
> +		u32 reserved1:1;
> +		u32 ce_buffer_size:1;
> +		u32 axi_active:1;
> +		u32 use_vmidmt:1;
> +		u32 secured:1;
> +		u32 bam_has_no_bypass:1;
> +		u32 high_frequency_bam:1;
> +		u32 inactiv_tmrs_exst:1;
> +		u32 num_inactiv_tmrs:1;
> +		u32 desc_cache_depth:2;
> +		u32 cmd_desc_en:1;
> +		u32 inactiv_tmr_base:8;
> +	} bits;
> +};
> +
> +union bam_sw_revision {
> +	u32 value;
> +	struct {
> +		u32 step:16;
> +		u32 minor:12;
> +		u32 major:4;
> +	} bits;
> +};
> +
> +union bam_num_pipes {
> +	u32 value;
> +	struct {
> +		u32 bam_num_pipes:8;
> +		u32 reserved:8;
> +		u32 periph_non_pipe_grp:8;
> +		u32 bam_non_pipe_grp:8;
> +	} bits;
> +};
> +
> +union bam_irq_srcs_msk {
> +	u32 value;
> +	struct {
> +		u32 p_irq_msk:31;
> +		u32 bam_irq_msk:1;
> +	} bits;
> +};
> +
> +union bam_cnfg_bits {
> +	u32 value;
> +	struct {
> +		u32 obsolete:2;
> +		u32 bam_pipe_cnfg:1;
> +		u32 obsolete2:1;
> +		u32 reserved:7;
> +		u32 bam_full_pipe:1;
> +		u32 bam_no_ext_p_rst:1;
> +		u32 bam_ibc_disable:1;
> +		u32 bam_sb_clk_req:1;
> +		u32 bam_psm_csw_req:1;
> +		u32 bam_psm_p_res:1;
> +		u32 bam_au_p_res:1;
> +		u32 bam_si_p_res:1;
> +		u32 bam_wb_p_res:1;
> +		u32 bam_wb_blk_csw:1;
> +		u32 bam_wb_csw_ack_idl:1;
> +		u32 bam_wb_retr_svpnt:1;
> +		u32 bam_wb_dsc_avl_p_rst:1;
> +		u32 bam_reg_p_en:1;
> +		u32 bam_psm_p_hd_data:1;
> +		u32 bam_au_accumed:1;
> +		u32 bam_cd_enable:1;
> +		u32 reserved2:4;
> +	} bits;
> +};
> +
> +union bam_pipe_ctrl {
> +	u32 value;
> +	struct {
> +		u32 reserved:1;
> +		u32 p_en:1;
> +		u32 reserved2:1;
> +		u32 p_direction:1;
> +		u32 p_sys_strm:1;
> +		u32 p_sys_mode:1;
> +		u32 p_auto_eob:1;
> +		u32 p_auto_eob_sel:2;
> +		u32 p_prefetch_limit:2;
> +		u32 p_write_nwd:1;
> +		u32 reserved3:4;
> +		u32 p_lock_group:5;
> +		u32 reserved4:11;
> +	} bits;
> +};

Hmm.. I believe bitfields are not portable and rely on
implementation defined behavior. The compiler is free to put
these bits in whatever order it wants. For example, you're not
guaranteed that p_en comes after reserved. Please move away from
these unions and just do the bit shifting in the code.

> +
> +/* BAM_DESC_CNT_TRSHLD */
> +#define CNT_TRSHLD		0xffff
> +#define DEFAULT_CNT_THRSHLD	0x4
> +
> +/* BAM_IRQ_SRCS */
> +#define BAM_IRQ			(0x1 << 31)
> +#define P_IRQ			0x7fffffff
> +
> +/* BAM_IRQ_SRCS_MSK */
> +#define BAM_IRQ_MSK		(0x1 << 31)
> +#define P_IRQ_MSK		0x7fffffff
> +
> +/* BAM_IRQ_STTS */
> +#define BAM_TIMER_IRQ		(0x1 << 4)
> +#define BAM_EMPTY_IRQ		(0x1 << 3)
> +#define BAM_ERROR_IRQ		(0x1 << 2)
> +#define BAM_HRESP_ERR_IRQ	(0x1 << 1)
> +
> +/* BAM_IRQ_CLR */
> +#define BAM_TIMER_CLR		(0x1 << 4)
> +#define BAM_EMPTY_CLR		(0x1 << 3)
> +#define BAM_ERROR_CLR		(0x1 << 2)
> +#define BAM_HRESP_ERR_CLR	(0x1 << 1)
> +
> +/* BAM_IRQ_EN */
> +#define BAM_TIMER_EN		(0x1 << 4)
> +#define BAM_EMPTY_EN		(0x1 << 3)
> +#define BAM_ERROR_EN		(0x1 << 2)
> +#define BAM_HRESP_ERR_EN	(0x1 << 1)
> +
> +/* BAM_P_IRQ_EN */
> +#define P_PRCSD_DESC_EN		(0x1 << 0)
> +#define P_TIMER_EN		(0x1 << 1)
> +#define P_WAKE_EN		(0x1 << 2)
> +#define P_OUT_OF_DESC_EN	(0x1 << 3)
> +#define P_ERR_EN		(0x1 << 4)
> +#define P_TRNSFR_END_EN		(0x1 << 5)
> +#define P_DEFAULT_IRQS_EN	(P_PRCSD_DESC_EN | P_ERR_EN | P_TRNSFR_END_EN)
> +
> +/* BAM_P_SW_OFSTS */
> +#define P_SW_OFSTS_MASK		0xffff
> +
> +#define BAM_DESC_FIFO_SIZE	SZ_32K
> +#define MAX_DESCRIPTORS (BAM_DESC_FIFO_SIZE / sizeof(struct bam_desc_hw) - 1)
> +#define BAM_MAX_DATA_SIZE	(SZ_32K - 8)
> +
> +struct bam_chan {
> +	struct dma_chan common;
> +	struct bam_device *device;
> +	u32 id;
> +	u32 ee;
> +	bool idle;

Is this used?

> +	struct bam_dma_slave_config bam_slave;	/* runtime configuration */
> +
> +	struct tasklet_struct tasklet;
> +	spinlock_t lock;		/* descriptor lock */
> +
> +	struct list_head pending;	/* desc pending queue */
> +	struct list_head active;	/* desc running queue */
> +
> +	struct bam_desc_hw *fifo_virt;
> +	dma_addr_t fifo_phys;
> +
> +	/* fifo markers */
> +	unsigned short head;		/* start of active descriptor entries */
> +	unsigned short tail;		/* end of active descriptor entries */
> +};
> +
> +static inline struct bam_chan *to_bam_chan(struct dma_chan *common)
> +{
> +	return container_of(common, struct bam_chan, common);
> +}
> +
> +struct bam_device {
> +	void __iomem *regs;
> +	struct device *dev;
> +	struct dma_device common;
> +	struct device_dma_parameters dma_parms;
> +	struct bam_chan *channels;
> +	u32 num_channels;
> +	u32 num_ees;
> +	unsigned long enabled_ees;
> +	u32 feature;

Is this used?

> +	int irq;
> +	struct clk *bamclk;
> +};
> +
> +static inline struct bam_device *to_bam_device(struct dma_device *common)
> +{
> +	return container_of(common, struct bam_device, common);
> +}
> +
> +#endif /* __MSM_BAM_DMA_PRIV_H__ */
Andy Gross Oct. 30, 2013, 8:31 p.m. UTC | #2
On Tue, Oct 29, 2013 at 10:56:03AM -0700, Stephen Boyd wrote:
> On 10/25, Andy Gross wrote:
> > diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
> > index f238cfd..a71b415 100644
> > --- a/drivers/dma/Kconfig
> > +++ b/drivers/dma/Kconfig
> > @@ -364,4 +364,13 @@ config DMATEST
> >  	  Simple DMA test client. Say N unless you're debugging a
> >  	  DMA Device driver.
> >  
> > +
> > +config MSM_BAM_DMA
> > +	tristate "MSM BAM DMA support"
> > +	depends on ARCH_MSM
> 
> It would be nice if we didn't have to rely on ARCH_MSM here so we
> get more build coverage.
 
I can remove that.  There is nothing that forces this depend option.

> > +	select DMA_ENGINE
> > +	---help---
> > +	  Enable support for the MSM BAM DMA controller.  This controller
> > +	  provides DMA capabilities for a variety of on-chip devices.
> > +
> >  endif
> > diff --git a/drivers/dma/msm_bam_dma.c b/drivers/dma/msm_bam_dma.c
> > new file mode 100644
> > index 0000000..d16bf94
> > --- /dev/null
> > +++ b/drivers/dma/msm_bam_dma.c
> > @@ -0,0 +1,840 @@
> > +/*
> > + * MSM BAM DMA engine driver
> > + *
> > + * Copyright (c) 2013, The Linux Foundation. All rights reserved.
> > + *
> > + * This program is free software; you can redistribute it and/or modify
> > + * it under the terms of the GNU General Public License version 2 and
> > + * only version 2 as published by the Free Software Foundation.
> > + *
> > + * This program is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> > + * GNU General Public License for more details.
> > + */
> > +
> > +#include <linux/kernel.h>
> > +#include <linux/io.h>
> > +#include <linux/init.h>
> > +#include <linux/slab.h>
> > +#include <linux/module.h>
> > +#include <linux/interrupt.h>
> > +#include <linux/dma-mapping.h>
> > +#include <linux/dmaengine.h>
> > +#include <linux/interrupt.h>
> 
> interrupt.h is here twice
 
Will remove.

> > +#include <linux/scatterlist.h>
> > +#include <linux/device.h>
> > +#include <linux/platform_device.h>
> > +#include <linux/of.h>
> > +#include <linux/of_address.h>
> > +#include <linux/of_irq.h>
> > +#include <linux/of_dma.h>
> > +#include <linux/clk.h>
> > +#include <linux/msm_bam_dma.h>
> > +
> > +#include "dmaengine.h"
> > +#include "msm_bam_dma_priv.h"
> 
> Why do we need this file? Can't we just put the #defines in this
> file?
 
There were enough definitions and structures to warrant another file.

> > +
> > +
> > +/*
> 
> There should be two '*'s here for kernel-doc.
 
Ok.  I'll conform.

> > + * bam_alloc_chan - Allocate channel resources for DMA channel.
> > + * @chan: specified channel
> > + *
> > + * This function allocates the FIFO descriptor memory and resets the channel
> > + */
> > +static int bam_alloc_chan(struct dma_chan *chan)
> > +{
> > +	struct bam_chan *bchan = to_bam_chan(chan);
> > +	struct bam_device *bdev = bchan->device;
> > +	u32 val;
> > +	union bam_pipe_ctrl pctrl;
> > +
> > +	/* check for channel activity */
> > +	pctrl.value = ioread32(bdev->regs + BAM_P_CTRL(bchan->id));
> 
> I recall io{read,write}*() being used for PCI and other ioport
> devices. If that's right then readl/writel would be more
> appropriate, and the *_relaxed variants would be even better if
> the correct memory barriers are also put in place.
 
I swear I ran across something about readl going legacy, which is why I went
with io{read,write}.  The relaxed variants would definitely be better.  I'll
switch over.

> > +	if (pctrl.bits.p_en) {
> > +		dev_err(bdev->dev, "channel already active\n");
> > +		return -EINVAL;
> > +	}
> > +
> > +	/* allocate FIFO descriptor space */
> > +	bchan->fifo_virt = (struct bam_desc_hw *)dma_alloc_coherent(bdev->dev,
> 
> There isn't any need to cast from void *.

Missed this one and a couple of others.  will fix.

> > +				BAM_DESC_FIFO_SIZE, &bchan->fifo_phys,
> > +				GFP_KERNEL);
> > +
> > +	if (!bchan->fifo_virt) {
> > +		dev_err(bdev->dev, "Failed to allocate descriptor fifo\n");
> > +		return -ENOMEM;
> > +	}
> > +
> > +	/* reset channel */
> > +	iowrite32(1, bdev->regs + BAM_P_RST(bchan->id));
> > +	iowrite32(0, bdev->regs + BAM_P_RST(bchan->id));
> > +
> > +	/* configure fifo address/size in bam channel registers */
> > +	iowrite32(bchan->fifo_phys, bdev->regs +
> > +			BAM_P_DESC_FIFO_ADDR(bchan->id));
> > +	iowrite32(BAM_DESC_FIFO_SIZE, bdev->regs +
> > +			BAM_P_FIFO_SIZES(bchan->id));
> > +
> > +	/* unmask and enable interrupts for defined EE, bam and error irqs */
> > +	iowrite32(BAM_IRQ_MSK, bdev->regs + BAM_IRQ_SRCS_EE(bchan->ee));
> > +
> > +	/* enable the per pipe interrupts, enable EOT and INT irqs */
> > +	iowrite32(P_DEFAULT_IRQS_EN, bdev->regs + BAM_P_IRQ_EN(bchan->id));
> > +
> > +	/* unmask the specific pipe and EE combo */
> > +	val = ioread32(bdev->regs + BAM_IRQ_SRCS_MSK_EE(bchan->ee));
> > +	val |= 1 << bchan->id;
> > +	iowrite32(val, bdev->regs + BAM_IRQ_SRCS_MSK_EE(bchan->ee));
> > +
> > +	/* set fixed direction and mode, then enable channel */
> > +	pctrl.value = 0;
> > +	pctrl.bits.p_direction =
> > +		(bchan->bam_slave.slave.direction == DMA_DEV_TO_MEM) ?
> > +			BAM_PIPE_PRODUCER : BAM_PIPE_CONSUMER;
> > +	pctrl.bits.p_sys_mode = BAM_PIPE_MODE_SYSTEM;
> > +	pctrl.bits.p_en = 1;
> > +
> > +	iowrite32(pctrl.value, bdev->regs + BAM_P_CTRL(bchan->id));
> > +
> > +	/* set desc threshold */
> 
> Is this a TODO?

I had moved this to the slave_config.  I'll remove the comment and make sure the
default is being set properly.

> > +	/* do bookkeeping for tracking used EEs, used during IRQ handling */
> > +	set_bit(bchan->ee, &bdev->enabled_ees);
> > +
> > +	bchan->head = 0;
> > +	bchan->tail = 0;
> > +
> > +	return 0;
> > +}
> > +
> > +/*
> > + * bam_free_chan - Frees dma resources associated with specific channel
> > + * @chan: specified channel
> > + *
> > + * Free the allocated fifo descriptor memory and channel resources
> > + *
> > + */
> > +static void bam_free_chan(struct dma_chan *chan)
> > +{
> > +	struct bam_chan *bchan = to_bam_chan(chan);
> > +	struct bam_device *bdev = bchan->device;
> > +	u32 val;
> > +
> > +	/* reset channel */
> > +	iowrite32(1, bdev->regs + BAM_P_RST(bchan->id));
> > +	iowrite32(0, bdev->regs + BAM_P_RST(bchan->id));
> > +
> > +	dma_free_coherent(bdev->dev, BAM_DESC_FIFO_SIZE, bchan->fifo_virt,
> > +				bchan->fifo_phys);
> > +
> > +	/* mask irq for pipe/channel */
> > +	val = ioread32(bdev->regs + BAM_IRQ_SRCS_MSK_EE(bchan->ee));
> > +	val &= ~(1 << bchan->id);
> > +	iowrite32(val, bdev->regs + BAM_IRQ_SRCS_MSK_EE(bchan->ee));
> > +
> > +	/* disable irq */
> > +	iowrite32(0, bdev->regs + BAM_P_IRQ_EN(bchan->id));
> > +
> > +	clear_bit(bchan->ee, &bdev->enabled_ees);
> > +}
> > +
> > +/*
> > + * bam_slave_config - set slave configuration for channel
> > + * @chan: dma channel
> > + * @cfg: slave configuration
> > + *
> > + * Sets slave configuration for channel
> > + * Only allow setting direction once.  BAM channels are unidirectional
> > + * and the direction is set in hardware.
> > + *
> > + */
> > +static void bam_slave_config(struct bam_chan *bchan,
> > +		struct bam_dma_slave_config *bcfg)
> > +{
> > +	struct bam_device *bdev = bchan->device;
> > +
> > +	bchan->bam_slave.desc_threshold = bcfg->desc_threshold;
> > +
> > +	/* set desc threshold */
> > +	iowrite32(bcfg->desc_threshold, bdev->regs + BAM_DESC_CNT_TRSHLD);
> > +}
> > +
> > +/*
> > + * bam_start_dma - loads up descriptors and starts dma
> > + * @chan: dma channel
> > + *
> > + * Loads descriptors into descriptor fifo and starts dma controller
> > + *
> > + * NOTE: Must hold channel lock
> > +*/
> > +static void bam_start_dma(struct bam_chan *bchan)
> > +{
> > +	struct bam_device *bdev = bchan->device;
> > +	struct bam_async_desc *async_desc, *_adesc;
> > +	u32 curr_len, val;
> > +	u32 num_processed = 0;
> > +
> > +	if (list_empty(&bchan->pending))
> > +		return;
> > +
> > +	curr_len = (bchan->head <= bchan->tail) ?
> > +			bchan->tail - bchan->head :
> > +			MAX_DESCRIPTORS - bchan->head + bchan->tail;
> > +
> > +	list_for_each_entry_safe(async_desc, _adesc, &bchan->pending, node) {
> > +
> > +		/* bust out if we are out of room */
> > +		if (async_desc->num_desc + curr_len > MAX_DESCRIPTORS)
> > +			break;
> > +
> > +		/* copy descriptors into fifo */
> > +		if (bchan->tail + async_desc->num_desc > MAX_DESCRIPTORS) {
> > +			u32 partial = MAX_DESCRIPTORS - bchan->tail;
> > +
> > +			memcpy(&bchan->fifo_virt[bchan->tail], async_desc->desc,
> > +				partial * sizeof(struct bam_desc_hw));
> > +			memcpy(bchan->fifo_virt, &async_desc->desc[partial],
> > +				(async_desc->num_desc - partial) *
> > +					sizeof(struct bam_desc_hw));
> > +		} else {
> > +			memcpy(&bchan->fifo_virt[bchan->tail], async_desc->desc,
> > +				async_desc->num_desc *
> > +				sizeof(struct bam_desc_hw));
> > +		}
> > +
> > +		num_processed++;
> > +		bchan->tail += async_desc->num_desc;
> > +		bchan->tail %= MAX_DESCRIPTORS;
> > +		curr_len += async_desc->num_desc;
> 
> I wonder if you could use the circ_buf API here.
 
I'll look into that.  I did a onceover and it looked promising.

> > +
> > +		list_move_tail(&async_desc->node, &bchan->active);
> > +	}
> > +
> > +	/* bail if we didn't queue anything to the active queue */
> > +	if (!num_processed)
> > +		return;
> > +
> > +	async_desc = list_first_entry(&bchan->active, struct bam_async_desc,
> > +			node);
> > +
> > +	val = ioread32(bdev->regs + BAM_P_SW_OFSTS(bchan->id));
> > +	val &= P_SW_OFSTS_MASK;
> > +
> > +	/* kick off dma by forcing a write event to the pipe */
> > +	iowrite32((bchan->tail * sizeof(struct bam_desc_hw)),
> > +			bdev->regs + BAM_P_EVNT_REG(bchan->id));
> > +}
> > +
> > +/*
> > + * bam_tx_submit - Adds transaction to channel pending queue
> > + * @tx: transaction to queue
> > + *
> > + * Adds dma transaction to pending queue for channel
> > + *
> > +*/
> > +static dma_cookie_t bam_tx_submit(struct dma_async_tx_descriptor *tx)
> > +{
> > +	struct bam_chan *bchan = to_bam_chan(tx->chan);
> > +	struct bam_async_desc *desc = to_bam_async_desc(tx);
> > +	dma_cookie_t cookie;
> > +
> > +	spin_lock_bh(&bchan->lock);
> > +
> > +	cookie = dma_cookie_assign(tx);
> > +	list_add_tail(&desc->node, &bchan->pending);
> > +
> > +	spin_unlock_bh(&bchan->lock);
> > +
> > +	return cookie;
> > +}
> > +
> > +/*
> > + * bam_prep_slave_sg - Prep slave sg transaction
> > + *
> > + * @chan: dma channel
> > + * @sgl: scatter gather list
> > + * @sg_len: length of sg
> > + * @direction: DMA transfer direction
> > + * @flags: DMA flags
> > + * @context: transfer context (unused)
> > + */
> > +static struct dma_async_tx_descriptor *bam_prep_slave_sg(struct dma_chan *chan,
> > +	struct scatterlist *sgl, unsigned int sg_len,
> > +	enum dma_transfer_direction direction, unsigned long flags,
> > +	void *context)
> > +{
> > +	struct bam_chan *bchan = to_bam_chan(chan);
> > +	struct bam_device *bdev = bchan->device;
> > +	struct bam_async_desc *async_desc = NULL;
> 
> Useless assignment? Just return NULL instead of the next 3 goto's and
> this can be avoided.
> 
> > +	struct scatterlist *sg;
> > +	u32 i;
> > +	struct bam_desc_hw *desc;
> > +
> > +
> > +	if (!is_slave_direction(direction)) {
> > +		dev_err(bdev->dev, "invalid dma direction\n");
> > +		goto err_out;
> > +	}
> 
> I'm surprised the core framework doesn't do this.

The is_slave_direction() was added as a helper.  And yeah I'd have expected it
as well.

> > +
> > +	/* direction has to match pipe configuration from the slave config */
> > +	if (direction != bchan->bam_slave.slave.direction) {
> > +		dev_err(bdev->dev,
> > +				"trans does not match channel configuration\n");
> 
> s/trans/transfer/ ?
 
Will reword to stay under 80 columns.  That's why I abbreviated

> > +		goto err_out;
> > +	}
> > +
> > +	/* make sure number of descriptors will fit within the fifo */
> > +	if (sg_len > MAX_DESCRIPTORS) {
> > +		dev_err(bdev->dev, "not enough fifo descriptor space\n");
> > +		goto err_out;
> > +	}
> > +
> > +	/* allocate enough room to accomodate the number of entries */
> > +	async_desc = kzalloc(sizeof(*async_desc) +
> > +			(sg_len * sizeof(struct bam_desc_hw)), GFP_KERNEL);
> > +
> > +	if (!async_desc) {
> > +		dev_err(bdev->dev, "failed to allocate async descriptor\n");
> > +		goto err_out;
> > +	}
> > +
> > +	async_desc->num_desc = sg_len;
> > +	async_desc->dir = (direction == DMA_DEV_TO_MEM) ? BAM_PIPE_PRODUCER :
> > +				BAM_PIPE_CONSUMER;
> > +
> > +	/* fill in descriptors, align hw descriptor to 8 bytes */
> > +	desc = async_desc->desc;
> > +	for_each_sg(sgl, sg, sg_len, i) {
> > +		if (sg_dma_len(sg) > BAM_MAX_DATA_SIZE) {
> > +			dev_err(bdev->dev, "segment exceeds max size\n");
> > +			goto err_out;
> > +		}
> > +
> > +		desc->addr = sg_dma_address(sg);
> > +		desc->size = sg_dma_len(sg);
> > +		desc++;
> > +	}
> > +
> > +	/* set EOT flag on last descriptor, we want IRQ on completion */
> > +	async_desc->desc[async_desc->num_desc-1].flags |= DESC_FLAG_EOT;
> 
> Please add space between num_desc, dash, and 1.
 
will fix

> > +
> > +	dma_async_tx_descriptor_init(&async_desc->txd, chan);
> > +	async_desc->txd.tx_submit = bam_tx_submit;
> > +
> > +	return &async_desc->txd;
> > +
> > +err_out:
> > +	kfree(async_desc);
> > +	return NULL;
> > +}
> > +
> > +/*
> > + * bam_dma_terminate_all - terminate all transactions
> > + * @chan: dma channel
> > + *
> > + * Idles channel and dequeues and frees all transactions
> > + * No callbacks are done
> > + *
> > +*/
> > +static void bam_dma_terminate_all(struct dma_chan *chan)
> > +{
> > +	struct bam_chan *bchan = to_bam_chan(chan);
> > +	struct bam_device *bdev = bchan->device;
> > +	LIST_HEAD(desc_cleanup);
> > +	struct bam_async_desc *desc, *_desc;
> > +
> > +	spin_lock_bh(&bchan->lock);
> > +
> > +	/* reset channel */
> > +	iowrite32(1, bdev->regs + BAM_P_RST(bchan->id));
> > +	iowrite32(0, bdev->regs + BAM_P_RST(bchan->id));
> > +
> > +	/* grab all the descriptors and free them */
> > +	list_splice_tail_init(&bchan->pending, &desc_cleanup);
> > +	list_splice_tail_init(&bchan->active, &desc_cleanup);
> > +
> > +	list_for_each_entry_safe(desc, _desc, &desc_cleanup, node)
> > +		kfree(desc);
> > +
> > +	spin_unlock_bh(&bchan->lock);
> > +}
> > +
> > +/*
> > + * bam_control - DMA device control
> > + * @chan: dma channel
> > + * @cmd: control cmd
> > + * @arg: cmd argument
> > + *
> > + * Perform DMA control command
> > + *
> > +*/
> > +static int bam_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
> > +	unsigned long arg)
> > +{
> > +	struct bam_chan *bchan = to_bam_chan(chan);
> > +	struct bam_device *bdev = bchan->device;
> > +	struct bam_dma_slave_config *bconfig;
> > +	int ret = 0;
> > +
> > +	switch (cmd) {
> > +	case DMA_PAUSE:
> > +		spin_lock_bh(&bchan->lock);
> > +		iowrite32(1, bdev->regs + BAM_P_HALT(bchan->id));
> > +		spin_unlock_bh(&bchan->lock);
> > +		break;
> > +	case DMA_RESUME:
> > +		spin_lock_bh(&bchan->lock);
> > +		iowrite32(0, bdev->regs + BAM_P_HALT(bchan->id));
> > +		spin_unlock_bh(&bchan->lock);
> > +		break;
> > +	case DMA_TERMINATE_ALL:
> > +		bam_dma_terminate_all(chan);
> > +		break;
> > +	case DMA_SLAVE_CONFIG:
> > +		bconfig = (struct bam_dma_slave_config *)arg;
> > +		bam_slave_config(bchan, bconfig);
> > +		break;
> > +	default:
> > +		ret = -EIO;
> > +		break;
> > +	}
> > +
> > +	return ret;
> > +}
> > +
> > +/*
> > + * process_irqs_per_ee - processes the interrupts for a specific ee
> > + * @bam: bam controller
> > + * @ee: execution environment
> > + *
> > + * This function processes the interrupts for a given execution environment
> > + *
> > + */
> > +static u32 process_irqs_per_ee(struct bam_device *bdev,
> > +	u32 ee)
> > +{
> > +	u32 i, srcs, stts, pipe_stts;
> > +	u32 clr_mask = 0;
> > +
> > +
> > +	srcs = ioread32(bdev->regs + BAM_IRQ_SRCS_EE(ee));
> > +
> > +	/* check for general bam error */
> > +	if (srcs & BAM_IRQ) {
> > +		stts = ioread32(bdev->regs + BAM_IRQ_STTS);
> > +		clr_mask |= stts;
> > +	}
> > +
> > +	/* check pipes / channels */
> > +	if (srcs & P_IRQ) {
> > +
> > +		for (i = 0; i < bdev->num_channels; i++) {
> > +			if (srcs & (1 << i)) {
> > +				/* clear pipe irq */
> > +				pipe_stts = ioread32(bdev->regs +
> > +					BAM_P_IRQ_STTS(i));
> > +
> > +				iowrite32(pipe_stts, bdev->regs +
> > +					BAM_P_IRQ_CLR(i));
> > +
> > +				/* schedule channel work */
> > +				tasklet_schedule(&bdev->channels[i].tasklet);
> 
> Maybe this little hunk inside the for loop deserves another
> function because we're pretty deeply nested here. Or invert the
> logic so that if (!(srcs & P_IRQ)) returns clr_mask and then this
> can be less indented.

Right I thought about the function originally.  I'll see about doing that or the
inverse logic solution.

> > +			}
> > +		}
> > +	}
> > +
> > +	return clr_mask;
> > +}
> > +
> > +/*
> > + * bam_dma_irq - irq handler for bam controller
> > + * @irq: IRQ of interrupt
> > + * @data: callback data
> > + *
> > + * IRQ handler for the bam controller
> > + */
> > +static irqreturn_t bam_dma_irq(int irq, void *data)
> > +{
> > +	struct bam_device *bdev = (struct bam_device *)data;
> 
> Unnecessary cast.
> 
> > +	u32 clr_mask = 0;
> > +	u32 i;
> > +
> > +
> > +	for (i = 0; i < bdev->num_ees; i++) {
> > +		if (test_bit(i, &bdev->enabled_ees))
> > +			clr_mask |= process_irqs_per_ee(bdev, i);
> > +	}
> 
> These braces aren't really necessary.
> 
> > +
> > +	iowrite32(clr_mask, bdev->regs + BAM_IRQ_CLR);
> > +
> > +	return IRQ_HANDLED;
> > +}
> > +
> > +/*
> > + * bam_tx_status - returns status of transaction
> > + * @chan: dma channel
> > + * @cookie: transaction cookie
> > + * @txstate: DMA transaction state
> > + *
> > + * Return status of dma transaction
> > + */
> > +static enum dma_status bam_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
> > +		struct dma_tx_state *txstate)
> > +{
> > +	return dma_cookie_status(chan, cookie, txstate);
> > +}
> 
> Do we actually need this function? Can't we just assign
> dma_cookie_status to device_tx_status below?

Maybe.  That would simplify things.

> > +
> > +/*
> > + * dma_tasklet - DMA IRQ tasklet
> > + * @data: tasklet argument (bam controller structure)
> > + *
> > + * Sets up next DMA operation and then processes all completed transactions
> > + */
> > +static void dma_tasklet(unsigned long data)
> > +{
> > +	struct bam_chan *bchan = (struct bam_chan *)data;
> > +	struct bam_async_desc *desc, *_desc;
> > +	LIST_HEAD(desc_cleanup);
> > +	u32 fifo_length;
> > +
> > +
> > +	spin_lock_bh(&bchan->lock);
> > +
> > +	if (list_empty(&bchan->active))
> > +		goto out;
> > +
> > +	fifo_length = (bchan->head <= bchan->tail) ?
> > +		bchan->tail - bchan->head :
> > +		MAX_DESCRIPTORS - bchan->head + bchan->tail;
> 
> This is fairly complicated. Can't we just write it like this?
> 
> 	if (bchan->head <= bchan->tail)
> 		fifo_length = bchan->tail - bchan->head;
> 	else
> 		fifo_length = MAX_DESCRIPTORS - bchan->head + bchan->tail;

Yeah, or if I switch to circ_buf it might simplify as well.

> > +
> > +	/* only process those which are currently done */
> > +	list_for_each_entry_safe(desc, _desc, &bchan->active, node) {
> > +		if (desc->num_desc > fifo_length)
> > +			break;
> > +
> > +		dma_cookie_complete(&desc->txd);
> > +
> > +		list_move_tail(&desc->node, &desc_cleanup);
> > +		fifo_length -= desc->num_desc;
> > +		bchan->head += desc->num_desc;
> > +		bchan->head %= MAX_DESCRIPTORS;
> > +	}
> > +
> > +out:
> > +	/* kick off processing of any queued descriptors */
> > +	bam_start_dma(bchan);
> > +
> > +	spin_unlock_bh(&bchan->lock);
> > +
> > +	/* process completed descriptors */
> > +	list_for_each_entry_safe(desc, _desc, &desc_cleanup, node) {
> > +		if (desc->txd.callback)
> > +			desc->txd.callback(desc->txd.callback_param);
> > +
> > +		kfree(desc);
> > +	}
> > +}
> > +
> > +/*
> > + * bam_issue_pending - starts pending transactions
> > + * @chan: dma channel
> > + *
> > + * Calls tasklet directly which in turn starts any pending transactions
> > + */
> > +static void bam_issue_pending(struct dma_chan *chan)
> > +{
> > +	dma_tasklet((unsigned long)chan);
> > +}
> > +
> > +struct bam_filter_args {
> > +	struct dma_device *dev;
> > +	u32 id;
> > +	u32 ee;
> > +	u32 dir;
> > +};
> > +
> > +static bool bam_dma_filter(struct dma_chan *chan, void *data)
> > +{
> > +	struct bam_filter_args *args = data;
> > +	struct bam_chan *bchan = to_bam_chan(chan);
> > +
> > +	if (args->dev == chan->device &&
> > +		args->id == bchan->id) {
> > +
> > +		/* we found the channel, so lets set the EE and dir */
> > +		bchan->ee = args->ee;
> > +		bchan->bam_slave.slave.direction = args->dir ?
> > +				DMA_DEV_TO_MEM : DMA_MEM_TO_DEV;
> > +		return true;
> > +	}
> > +
> > +	return false;
> > +}
> > +
> > +static struct dma_chan *bam_dma_xlate(struct of_phandle_args *dma_spec,
> > +		struct of_dma *of)
> > +{
> > +	struct bam_filter_args args;
> > +	dma_cap_mask_t cap;
> > +
> > +	if (dma_spec->args_count != 3)
> > +		return NULL;
> > +
> > +	args.dev = of->of_dma_data;
> > +	args.id = dma_spec->args[0];
> > +	args.ee = dma_spec->args[1];
> > +	args.dir = dma_spec->args[2];
> > +
> > +	dma_cap_zero(cap);
> > +	dma_cap_set(DMA_SLAVE, cap);
> > +
> > +	return dma_request_channel(cap, bam_dma_filter, &args);
> > +}
> > +
> > +/*
> > + * bam_init
> > + * @bdev: bam device
> > + *
> > + * Initialization helper for global bam registers
> > + */
> > +static int bam_init(struct bam_device *bdev)
> > +{
> > +	union bam_num_pipes num_pipes;
> > +	union bam_ctrl ctrl;
> > +	union bam_cnfg_bits cnfg_bits;
> > +	union bam_revision revision;
> > +
> > +	/* read versioning information */
> > +	revision.value = ioread32(bdev->regs + BAM_REVISION);
> > +	bdev->num_ees = revision.bits.num_ees;
> > +
> > +	num_pipes.value = ioread32(bdev->regs + BAM_NUM_PIPES);
> > +	bdev->num_channels = num_pipes.bits.bam_num_pipes;
> > +
> > +	/* s/w reset bam */
> > +	/* after reset all pipes are disabled and idle */
> > +	ctrl.value = ioread32(bdev->regs + BAM_CTRL);
> > +	ctrl.bits.bam_sw_rst = 1;
> > +	iowrite32(ctrl.value, bdev->regs + BAM_CTRL);
> > +	ctrl.bits.bam_sw_rst = 0;
> > +	iowrite32(ctrl.value, bdev->regs + BAM_CTRL);
> > +
> > +	/* enable bam */
> > +	ctrl.bits.bam_en = 1;
> > +	iowrite32(ctrl.value, bdev->regs + BAM_CTRL);
> > +
> > +	/* set descriptor threshhold, start with 4 bytes */
> > +	iowrite32(DEFAULT_CNT_THRSHLD, bdev->regs + BAM_DESC_CNT_TRSHLD);
> > +
> > +	/* set config bits for h/w workarounds */
> > +	/* Enable all workarounds except BAM_FULL_PIPE */
> > +	cnfg_bits.value = 0xffffffff;
> > +	cnfg_bits.bits.obsolete = 0;
> > +	cnfg_bits.bits.obsolete2 = 0;
> > +	cnfg_bits.bits.reserved = 0;
> > +	cnfg_bits.bits.reserved2 = 0;
> > +	cnfg_bits.bits.bam_full_pipe = 0;
> > +	iowrite32(cnfg_bits.value, bdev->regs + BAM_CNFG_BITS);
> > +
> > +	/* enable irqs for errors */
> > +	iowrite32(BAM_ERROR_EN | BAM_HRESP_ERR_EN, bdev->regs + BAM_IRQ_EN);
> > +	return 0;
> > +}
> > +
> > +static void bam_channel_init(struct bam_device *bdev, struct bam_chan *bchan,
> > +	u32 index)
> > +{
> > +	bchan->id = index;
> > +	bchan->common.device = &bdev->common;
> > +	bchan->device = bdev;
> > +	spin_lock_init(&bchan->lock);
> > +
> > +	INIT_LIST_HEAD(&bchan->pending);
> > +	INIT_LIST_HEAD(&bchan->active);
> > +
> > +	dma_cookie_init(&bchan->common);
> > +	list_add_tail(&bchan->common.device_node,
> > +		&bdev->common.channels);
> > +
> > +	tasklet_init(&bchan->tasklet, dma_tasklet, (unsigned long)bchan);
> > +
> > +	/* reset channel - just to be sure */
> > +	iowrite32(1, bdev->regs + BAM_P_RST(bchan->id));
> > +	iowrite32(0, bdev->regs + BAM_P_RST(bchan->id));
> > +}
> > +
> > +static int bam_dma_probe(struct platform_device *pdev)
> > +{
> > +	struct bam_device *bdev;
> > +	int err, i;
> > +
> > +	bdev = kzalloc(sizeof(*bdev), GFP_KERNEL);
> 
> Please use devm_kzalloc() here to simplify error paths.
> 

agreed

> > +	if (!bdev) {
> > +		dev_err(&pdev->dev, "insufficient memory for private data\n");
> 
> kmalloc calls already print errors when they fail, so this can be
> removed.
 
has this always been the case?

> > +		err = -ENOMEM;
> > +		goto err_no_bdev;
> 
> Just return the error here.
> 

Since it's the first check, I'm ok with that.  Otherwise i'd prefer to stick
with a unified return

> > +	}
> > +
> > +	bdev->dev = &pdev->dev;
> > +	dev_set_drvdata(bdev->dev, bdev);
> > +
> > +	bdev->regs = of_iomap(pdev->dev.of_node, 0);
> > +	if (!bdev->regs) {
> > +		dev_err(bdev->dev, "unable to ioremap base\n");
> > +		err = -ENOMEM;
> > +		goto err_free_bamdev;
> > +	}
> > +
> > +	bdev->irq = irq_of_parse_and_map(pdev->dev.of_node, 0);
> > +	if (bdev->irq == NO_IRQ) {
> > +		dev_err(bdev->dev, "unable to map irq\n");
> > +		err = -EINVAL;
> > +		goto err_unmap_mem;
> > +	}
> 
> Please use platform_* APIs here, this is a platform driver after
> all. Notably, use platform_get_irq() and platform_get_resource()
> followed by devm_ioremap_resource().
 
agreed.

> > +
> > +	bdev->bamclk = devm_clk_get(bdev->dev, "bam_clk");
> > +	if (IS_ERR(bdev->bamclk)) {
> > +		err = PTR_ERR(bdev->bamclk);
> > +		goto err_free_irq;
> > +	}
> > +
> > +	err = clk_prepare_enable(bdev->bamclk);
> > +	if (err) {
> > +		dev_err(bdev->dev, "failed to prepare/enable clock");
> > +		goto err_free_irq;
> > +	}
> > +
> > +	err = request_irq(bdev->irq, &bam_dma_irq, IRQF_TRIGGER_HIGH, "bam_dma",
> > +				bdev);
> 
> Can be devm_request_irq(). Shouldn't this be done much later in
> the probe? It looks like bdev isn't fully setup yet.
 
All the IRQs are disabled and masked, however in a module situation this might
not be the case.  It probably should be moved later.

> > +	if (err) {
> > +		dev_err(bdev->dev, "error requesting irq\n");
> > +		err = -EINVAL;
> > +		goto err_disable_clk;
> > +	}
> > +
> > +	if (bam_init(bdev)) {
> > +		dev_err(bdev->dev, "cannot initialize bam device\n");
> > +		err = -EINVAL;
> > +		goto err_disable_clk;
> > +	}
> > +
> > +	bdev->channels = kzalloc(sizeof(*bdev->channels) * bdev->num_channels,
> > +				GFP_KERNEL);
> > +
> 
> We're going to have devm_kcalloc() in 3.13 so you should be able
> to use that here.
> 
> > +	if (!bdev->channels) {
> > +		dev_err(bdev->dev, "unable to allocate channels\n");
> > +		err = -ENOMEM;
> > +		goto err_disable_clk;
> > +	}
> > +
> > +	/* allocate and initialize channels */
> > +	INIT_LIST_HEAD(&bdev->common.channels);
> > +
> > +	for (i = 0; i < bdev->num_channels; i++)
> > +		bam_channel_init(bdev, &bdev->channels[i], i);
> > +
> > +	/* set max dma segment size */
> > +	bdev->common.dev = bdev->dev;
> > +	bdev->common.dev->dma_parms = &bdev->dma_parms;
> > +	if (dma_set_max_seg_size(bdev->common.dev, BAM_MAX_DATA_SIZE)) {
> > +		dev_err(bdev->dev, "cannot set maximum segment size\n");
> > +		goto err_disable_clk;
> > +	}
> > +
> > +	/* set capabilities */
> > +	dma_cap_zero(bdev->common.cap_mask);
> > +	dma_cap_set(DMA_SLAVE, bdev->common.cap_mask);
> > +
> > +	/* initialize dmaengine apis */
> > +	bdev->common.device_alloc_chan_resources = bam_alloc_chan;
> > +	bdev->common.device_free_chan_resources = bam_free_chan;
> > +	bdev->common.device_prep_slave_sg = bam_prep_slave_sg;
> > +	bdev->common.device_control = bam_control;
> > +	bdev->common.device_issue_pending = bam_issue_pending;
> > +	bdev->common.device_tx_status = bam_tx_status;
> > +	bdev->common.dev = bdev->dev;
> > +
> > +	dma_async_device_register(&bdev->common);
> 
> This can fail. Please check error codes.
 
agreed

> > +
> > +	if (pdev->dev.of_node) {
> > +		err = of_dma_controller_register(pdev->dev.of_node,
> > +				bam_dma_xlate, &bdev->common);
> > +
> > +		if (err) {
> > +			dev_err(bdev->dev, "failed to register of_dma\n");
> > +			goto err_unregister_dma;
> > +		}
> > +	}
> > +
> > +	return 0;
> > +
> > +err_unregister_dma:
> > +	dma_async_device_unregister(&bdev->common);
> > +err_free_irq:
> > +	free_irq(bdev->irq, bdev);
> > +err_disable_clk:
> > +	clk_disable_unprepare(bdev->bamclk);
> > +err_unmap_mem:
> > +	iounmap(bdev->regs);
> > +err_free_bamdev:
> > +	if (bdev)
> > +		kfree(bdev->channels);
> > +	kfree(bdev);
> > +err_no_bdev:
> > +	return err;
> > +}
> > +
> > +static int bam_dma_remove(struct platform_device *pdev)
> > +{
> > +	struct bam_device *bdev;
> > +
> > +	bdev = dev_get_drvdata(&pdev->dev);
> > +	dev_set_drvdata(&pdev->dev, NULL);
> 
> This is unnecessary.
> 
> > +
> > +	dma_async_device_unregister(&bdev->common);
> > +
> > +	if (bdev) {
> 
> Ouch. We just dereferenced bdev one line before so it seems that
> this check is unnecessary.

True.  I believe at one point I was using the remove function call directly from
within the probe during failure.  I can rework this.

> > +		free_irq(bdev->irq, bdev);
> > +		clk_disable_unprepare(bdev->bamclk);
> > +		iounmap(bdev->regs);
> > +		kfree(bdev->channels);
> > +	}
> > +
> > +	kfree(bdev);
> > +	return 0;
> > +}
> [...]
> > +
> > +static int __init bam_dma_init(void)
> > +{
> > +	return platform_driver_register(&bam_dma_driver);
> > +}
> > +
> > +static void __exit bam_dma_exit(void)
> > +{
> > +	return platform_driver_unregister(&bam_dma_driver);
> > +}
> > +
> > +arch_initcall(bam_dma_init);
> > +module_exit(bam_dma_exit);
> 
> module_platform_driver()? Or is there some probe deferral problem
> that isn't resolved?
> 

Good point. If this becomes a problem, the peripheral drivers using the DMA can
implement probe deferral.

> > +
> > +MODULE_AUTHOR("Andy Gross <agross@codeaurora.org>");
> > +MODULE_DESCRIPTION("MSM BAM DMA engine driver");
> > +MODULE_LICENSE("GPL v2");
> > diff --git a/drivers/dma/msm_bam_dma_priv.h b/drivers/dma/msm_bam_dma_priv.h
> > new file mode 100644
> > index 0000000..4d6a10c7
> > --- /dev/null
> > +++ b/drivers/dma/msm_bam_dma_priv.h
> > @@ -0,0 +1,286 @@
> > +/*
> > + * Copyright (c) 2013, The Linux Foundation. All rights reserved.
> > + *
> > + * This program is free software; you can redistribute it and/or modify
> > + * it under the terms of the GNU General Public License version 2 and
> > + * only version 2 as published by the Free Software Foundation.
> > + *
> > + * This program is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> > + * GNU General Public License for more details.
> > + */
> > +#ifndef __MSM_BAM_DMA_PRIV_H__
> > +#define __MSM_BAM_DMA_PRIV_H__
> > +
> > +#include <linux/dmaengine.h>
> > +
> > +enum bam_channel_mode {
> > +	BAM_PIPE_MODE_BAM2BAM = 0,	/* BAM to BAM aka device to device */
> > +	BAM_PIPE_MODE_SYSTEM,		/* BAM to/from System Memory */
> > +};
> > +
> > +enum bam_channel_dir {
> > +	BAM_PIPE_CONSUMER = 0,	/* channel reads from data-fifo or memory */
> > +	BAM_PIPE_PRODUCER,	/* channel writes to data-fifo or memory */
> > +};
> > +
> > +struct bam_desc_hw {
> > +	u32 addr;		/* Buffer physical address */
> > +	u32 size:16;		/* Buffer size in bytes */
> > +	u32 flags:16;
> > +};
> 
> Is this an in memory structure that the hardware reads? It should
> probably use the correct types (i.e. u16 instead of bit fields)
> and then be marked as __packed__ so that compile doesn't mess
> up the alignment.
 
Will fix this.  There isn't any need for the bitfields.

> > +
> > +#define DESC_FLAG_INT	(1<<15)
> > +#define DESC_FLAG_EOT	(1<<14)
> > +#define DESC_FLAG_EOB	(1<<13)
> 
> Space around << here please. Or use the BIT() macro.
> 

BIT() makes it cleaner.  I'll use that instead

> > +
> > +struct bam_async_desc {
> > +	struct list_head node;
> > +	struct dma_async_tx_descriptor txd;
> > +	u32 num_desc;
> > +	enum bam_channel_dir dir;
> > +	u32 fifo_pos;
> > +	struct bam_desc_hw desc[0];
> > +};
> > +
> > +static inline struct bam_async_desc *to_bam_async_desc(
> > +		struct dma_async_tx_descriptor *txd)
> > +{
> > +	return container_of(txd, struct bam_async_desc, txd);
> > +}
> > +
> > +
> > +#define BAM_CTRL			0x0000
> > +#define BAM_REVISION			0x0004
> > +#define BAM_SW_REVISION			0x0080
> > +#define BAM_NUM_PIPES			0x003C
> > +#define BAM_TIMER			0x0040
> > +#define BAM_TIMER_CTRL			0x0044
> > +#define BAM_DESC_CNT_TRSHLD		0x0008
> > +#define BAM_IRQ_SRCS			0x000C
> > +#define BAM_IRQ_SRCS_MSK		0x0010
> > +#define BAM_IRQ_SRCS_UNMASKED		0x0030
> > +#define BAM_IRQ_STTS			0x0014
> > +#define BAM_IRQ_CLR			0x0018
> > +#define BAM_IRQ_EN			0x001C
> > +#define BAM_CNFG_BITS			0x007C
> > +#define BAM_IRQ_SRCS_EE(x)		(0x0800 + ((x) * 0x80))
> > +#define BAM_IRQ_SRCS_MSK_EE(x)		(0x0804 + ((x) * 0x80))
> > +#define BAM_P_CTRL(x)			(0x1000 + ((x) * 0x1000))
> > +#define BAM_P_RST(x)			(0x1004 + ((x) * 0x1000))
> > +#define BAM_P_HALT(x)			(0x1008 + ((x) * 0x1000))
> > +#define BAM_P_IRQ_STTS(x)		(0x1010 + ((x) * 0x1000))
> > +#define BAM_P_IRQ_CLR(x)		(0x1014 + ((x) * 0x1000))
> > +#define BAM_P_IRQ_EN(x)			(0x1018 + ((x) * 0x1000))
> > +#define BAM_P_EVNT_DEST_ADDR(x)		(0x182C + ((x) * 0x1000))
> > +#define BAM_P_EVNT_REG(x)		(0x1818 + ((x) * 0x1000))
> > +#define BAM_P_SW_OFSTS(x)		(0x1800 + ((x) * 0x1000))
> > +#define BAM_P_DATA_FIFO_ADDR(x)		(0x1824 + ((x) * 0x1000))
> > +#define BAM_P_DESC_FIFO_ADDR(x)		(0x181C + ((x) * 0x1000))
> > +#define BAM_P_EVNT_TRSHLD(x)		(0x1828 + ((x) * 0x1000))
> > +#define BAM_P_FIFO_SIZES(x)		(0x1820 + ((x) * 0x1000))
> 
> If you have the columns it might be nice to s/x/pipe/ so that it's
> clear what 'x' is.

Will do

> > +
> > +union bam_ctrl {
> > +	u32 value;
> > +	struct {
> > +		u32 bam_sw_rst:1;
> > +		u32 bam_en:1;
> > +		u32 reserved3:2;
> > +		u32 bam_en_accum:1;
> > +		u32 testbus_sel:7;
> > +		u32 reserved2:1;
> > +		u32 bam_desc_cache_sel:2;
> > +		u32 bam_cached_desc_store:1;
> > +		u32 ibc_disable:1;
> > +		u32 reserved1:15;
> > +	} bits;
> > +};
> > +
> > +union bam_revision {
> > +	u32 value;
> > +	struct {
> > +		u32 revision:8;
> > +		u32 num_ees:4;
> > +		u32 reserved1:1;
> > +		u32 ce_buffer_size:1;
> > +		u32 axi_active:1;
> > +		u32 use_vmidmt:1;
> > +		u32 secured:1;
> > +		u32 bam_has_no_bypass:1;
> > +		u32 high_frequency_bam:1;
> > +		u32 inactiv_tmrs_exst:1;
> > +		u32 num_inactiv_tmrs:1;
> > +		u32 desc_cache_depth:2;
> > +		u32 cmd_desc_en:1;
> > +		u32 inactiv_tmr_base:8;
> > +	} bits;
> > +};
> > +
> > +union bam_sw_revision {
> > +	u32 value;
> > +	struct {
> > +		u32 step:16;
> > +		u32 minor:12;
> > +		u32 major:4;
> > +	} bits;
> > +};
> > +
> > +union bam_num_pipes {
> > +	u32 value;
> > +	struct {
> > +		u32 bam_num_pipes:8;
> > +		u32 reserved:8;
> > +		u32 periph_non_pipe_grp:8;
> > +		u32 bam_non_pipe_grp:8;
> > +	} bits;
> > +};
> > +
> > +union bam_irq_srcs_msk {
> > +	u32 value;
> > +	struct {
> > +		u32 p_irq_msk:31;
> > +		u32 bam_irq_msk:1;
> > +	} bits;
> > +};
> > +
> > +union bam_cnfg_bits {
> > +	u32 value;
> > +	struct {
> > +		u32 obsolete:2;
> > +		u32 bam_pipe_cnfg:1;
> > +		u32 obsolete2:1;
> > +		u32 reserved:7;
> > +		u32 bam_full_pipe:1;
> > +		u32 bam_no_ext_p_rst:1;
> > +		u32 bam_ibc_disable:1;
> > +		u32 bam_sb_clk_req:1;
> > +		u32 bam_psm_csw_req:1;
> > +		u32 bam_psm_p_res:1;
> > +		u32 bam_au_p_res:1;
> > +		u32 bam_si_p_res:1;
> > +		u32 bam_wb_p_res:1;
> > +		u32 bam_wb_blk_csw:1;
> > +		u32 bam_wb_csw_ack_idl:1;
> > +		u32 bam_wb_retr_svpnt:1;
> > +		u32 bam_wb_dsc_avl_p_rst:1;
> > +		u32 bam_reg_p_en:1;
> > +		u32 bam_psm_p_hd_data:1;
> > +		u32 bam_au_accumed:1;
> > +		u32 bam_cd_enable:1;
> > +		u32 reserved2:4;
> > +	} bits;
> > +};
> > +
> > +union bam_pipe_ctrl {
> > +	u32 value;
> > +	struct {
> > +		u32 reserved:1;
> > +		u32 p_en:1;
> > +		u32 reserved2:1;
> > +		u32 p_direction:1;
> > +		u32 p_sys_strm:1;
> > +		u32 p_sys_mode:1;
> > +		u32 p_auto_eob:1;
> > +		u32 p_auto_eob_sel:2;
> > +		u32 p_prefetch_limit:2;
> > +		u32 p_write_nwd:1;
> > +		u32 reserved3:4;
> > +		u32 p_lock_group:5;
> > +		u32 reserved4:11;
> > +	} bits;
> > +};
> 
> Hmm.. I believe bitfields are not portable and rely on
> implementation defined behavior. The compiler is free to put
> these bits in whatever order it wants. For example, you're not
> guaranteed that p_en comes after reserved. Please move away from
> these unions and just do the bit shifting in the code.
 
Will do.  I can just define bits/masks and do it that way.

> > +
> > +/* BAM_DESC_CNT_TRSHLD */
> > +#define CNT_TRSHLD		0xffff
> > +#define DEFAULT_CNT_THRSHLD	0x4
> > +
> > +/* BAM_IRQ_SRCS */
> > +#define BAM_IRQ			(0x1 << 31)
> > +#define P_IRQ			0x7fffffff
> > +
> > +/* BAM_IRQ_SRCS_MSK */
> > +#define BAM_IRQ_MSK		(0x1 << 31)
> > +#define P_IRQ_MSK		0x7fffffff
> > +
> > +/* BAM_IRQ_STTS */
> > +#define BAM_TIMER_IRQ		(0x1 << 4)
> > +#define BAM_EMPTY_IRQ		(0x1 << 3)
> > +#define BAM_ERROR_IRQ		(0x1 << 2)
> > +#define BAM_HRESP_ERR_IRQ	(0x1 << 1)
> > +
> > +/* BAM_IRQ_CLR */
> > +#define BAM_TIMER_CLR		(0x1 << 4)
> > +#define BAM_EMPTY_CLR		(0x1 << 3)
> > +#define BAM_ERROR_CLR		(0x1 << 2)
> > +#define BAM_HRESP_ERR_CLR	(0x1 << 1)
> > +
> > +/* BAM_IRQ_EN */
> > +#define BAM_TIMER_EN		(0x1 << 4)
> > +#define BAM_EMPTY_EN		(0x1 << 3)
> > +#define BAM_ERROR_EN		(0x1 << 2)
> > +#define BAM_HRESP_ERR_EN	(0x1 << 1)
> > +
> > +/* BAM_P_IRQ_EN */
> > +#define P_PRCSD_DESC_EN		(0x1 << 0)
> > +#define P_TIMER_EN		(0x1 << 1)
> > +#define P_WAKE_EN		(0x1 << 2)
> > +#define P_OUT_OF_DESC_EN	(0x1 << 3)
> > +#define P_ERR_EN		(0x1 << 4)
> > +#define P_TRNSFR_END_EN		(0x1 << 5)
> > +#define P_DEFAULT_IRQS_EN	(P_PRCSD_DESC_EN | P_ERR_EN | P_TRNSFR_END_EN)
> > +
> > +/* BAM_P_SW_OFSTS */
> > +#define P_SW_OFSTS_MASK		0xffff
> > +
> > +#define BAM_DESC_FIFO_SIZE	SZ_32K
> > +#define MAX_DESCRIPTORS (BAM_DESC_FIFO_SIZE / sizeof(struct bam_desc_hw) - 1)
> > +#define BAM_MAX_DATA_SIZE	(SZ_32K - 8)
> > +
> > +struct bam_chan {
> > +	struct dma_chan common;
> > +	struct bam_device *device;
> > +	u32 id;
> > +	u32 ee;
> > +	bool idle;
> 
> Is this used?

It is orphaned, will be removed

> > +	struct bam_dma_slave_config bam_slave;	/* runtime configuration */
> > +
> > +	struct tasklet_struct tasklet;
> > +	spinlock_t lock;		/* descriptor lock */
> > +
> > +	struct list_head pending;	/* desc pending queue */
> > +	struct list_head active;	/* desc running queue */
> > +
> > +	struct bam_desc_hw *fifo_virt;
> > +	dma_addr_t fifo_phys;
> > +
> > +	/* fifo markers */
> > +	unsigned short head;		/* start of active descriptor entries */
> > +	unsigned short tail;		/* end of active descriptor entries */
> > +};
> > +
> > +static inline struct bam_chan *to_bam_chan(struct dma_chan *common)
> > +{
> > +	return container_of(common, struct bam_chan, common);
> > +}
> > +
> > +struct bam_device {
> > +	void __iomem *regs;
> > +	struct device *dev;
> > +	struct dma_device common;
> > +	struct device_dma_parameters dma_parms;
> > +	struct bam_chan *channels;
> > +	u32 num_channels;
> > +	u32 num_ees;
> > +	unsigned long enabled_ees;
> > +	u32 feature;
> 
> Is this used?

It is orphaned, will be removed
 
> > +	int irq;
> > +	struct clk *bamclk;
> > +};
> > +
> > +static inline struct bam_device *to_bam_device(struct dma_device *common)
> > +{
> > +	return container_of(common, struct bam_device, common);
> > +}
> > +
> > +#endif /* __MSM_BAM_DMA_PRIV_H__ */
Stephen Boyd Oct. 30, 2013, 8:46 p.m. UTC | #3
On 10/30/13 13:31, Andy Gross wrote:
> On Tue, Oct 29, 2013 at 10:56:03AM -0700, Stephen Boyd wrote:
>> On 10/25, Andy Gross wrote:
>>> +#include <linux/scatterlist.h>
>>> +#include <linux/device.h>
>>> +#include <linux/platform_device.h>
>>> +#include <linux/of.h>
>>> +#include <linux/of_address.h>
>>> +#include <linux/of_irq.h>
>>> +#include <linux/of_dma.h>
>>> +#include <linux/clk.h>
>>> +#include <linux/msm_bam_dma.h>
>>> +
>>> +#include "dmaengine.h"
>>> +#include "msm_bam_dma_priv.h"
>> Why do we need this file? Can't we just put the #defines in this
>> file?
>  
> There were enough definitions and structures to warrant another file.
>

Ah ok. I find it annoying to flip between two files but I guess that's
my problem.

>>> +	if (!bdev) {
>>> +		dev_err(&pdev->dev, "insufficient memory for private data\n");
>> kmalloc calls already print errors when they fail, so this can be
>> removed.
>  
> has this always been the case?

The warning in the page allocator seems to have been there since pre-git
days (see __alloc_pages_slowpath() and how it calls warn_alloc_failed())
. Other warnings in the sl*b allocators seem to have come later (see
8bdec192b40cf7f7eec170b317c76089eb5eeddb for example).
Vinod Koul Oct. 31, 2013, 4:59 p.m. UTC | #4
On Fri, Oct 25, 2013 at 03:24:02PM -0500, Andy Gross wrote:

This should be sent to dmaengine@vger.kernel.org

> Add the DMA engine driver for the MSM Bus Access Manager (BAM) DMA controller
> found in the MSM 8x74 platforms.
> 
> Each BAM DMA device is associated with a specific on-chip peripheral.  Each
> channel provides a uni-directional data transfer engine that is capable of
> transferring data between the peripheral and system memory (System mode) or
> between two peripherals (BAM2BAM).
> 
> The initial release of this driver only supports slave transfers between
> peripherals and system memory.
> 
> Signed-off-by: Andy Gross <agross@codeaurora.org>
> +/*
> + * bam_alloc_chan - Allocate channel resources for DMA channel.
> + * @chan: specified channel
> + *
> + * This function allocates the FIFO descriptor memory and resets the channel
> + */
> +static int bam_alloc_chan(struct dma_chan *chan)
> +{
> +	struct bam_chan *bchan = to_bam_chan(chan);
> +	struct bam_device *bdev = bchan->device;
> +	u32 val;
> +	union bam_pipe_ctrl pctrl;
> +
> +	/* check for channel activity */
> +	pctrl.value = ioread32(bdev->regs + BAM_P_CTRL(bchan->id));
> +	if (pctrl.bits.p_en) {
> +		dev_err(bdev->dev, "channel already active\n");
> +		return -EINVAL;
> +	}
> +
> +	/* allocate FIFO descriptor space */
> +	bchan->fifo_virt = (struct bam_desc_hw *)dma_alloc_coherent(bdev->dev,
> +				BAM_DESC_FIFO_SIZE, &bchan->fifo_phys,
> +				GFP_KERNEL);
> +
> +	if (!bchan->fifo_virt) {
> +		dev_err(bdev->dev, "Failed to allocate descriptor fifo\n");
> +		return -ENOMEM;
> +	}
> +
> +	/* reset channel */
> +	iowrite32(1, bdev->regs + BAM_P_RST(bchan->id));
> +	iowrite32(0, bdev->regs + BAM_P_RST(bchan->id));
> +
> +	/* configure fifo address/size in bam channel registers */
> +	iowrite32(bchan->fifo_phys, bdev->regs +
> +			BAM_P_DESC_FIFO_ADDR(bchan->id));
> +	iowrite32(BAM_DESC_FIFO_SIZE, bdev->regs +
> +			BAM_P_FIFO_SIZES(bchan->id));
> +
> +	/* unmask and enable interrupts for defined EE, bam and error irqs */
> +	iowrite32(BAM_IRQ_MSK, bdev->regs + BAM_IRQ_SRCS_EE(bchan->ee));
> +
> +	/* enable the per pipe interrupts, enable EOT and INT irqs */
> +	iowrite32(P_DEFAULT_IRQS_EN, bdev->regs + BAM_P_IRQ_EN(bchan->id));
> +
> +	/* unmask the specific pipe and EE combo */
> +	val = ioread32(bdev->regs + BAM_IRQ_SRCS_MSK_EE(bchan->ee));
> +	val |= 1 << bchan->id;
> +	iowrite32(val, bdev->regs + BAM_IRQ_SRCS_MSK_EE(bchan->ee));
> +
> +	/* set fixed direction and mode, then enable channel */
I was going to question why you are doing hw specfic stuff in alloc channel but
now why do you enable seems to be a bigger question in mind?
> +	pctrl.value = 0;
> +	pctrl.bits.p_direction =
> +		(bchan->bam_slave.slave.direction == DMA_DEV_TO_MEM) ?
> +			BAM_PIPE_PRODUCER : BAM_PIPE_CONSUMER;
> +	pctrl.bits.p_sys_mode = BAM_PIPE_MODE_SYSTEM;
> +	pctrl.bits.p_en = 1;
> +
> +	iowrite32(pctrl.value, bdev->regs + BAM_P_CTRL(bchan->id));
> +
> +	/* set desc threshold */
> +	/* do bookkeeping for tracking used EEs, used during IRQ handling */
> +	set_bit(bchan->ee, &bdev->enabled_ees);
> +
> +	bchan->head = 0;
> +	bchan->tail = 0;
> +
> +	return 0;
You said you are going to allocate descriptors so right thing would be return
number of allocated desc here!
> +}
> +
> +/*
> + * bam_free_chan - Frees dma resources associated with specific channel
> + * @chan: specified channel
> + *
> + * Free the allocated fifo descriptor memory and channel resources
> + *
> + */
> +static void bam_free_chan(struct dma_chan *chan)
> +{
> +	struct bam_chan *bchan = to_bam_chan(chan);
> +	struct bam_device *bdev = bchan->device;
> +	u32 val;
> +
Shouldn't you check if channel is busy?

> +	/* reset channel */
> +	iowrite32(1, bdev->regs + BAM_P_RST(bchan->id));
> +	iowrite32(0, bdev->regs + BAM_P_RST(bchan->id));
> +
> +	dma_free_coherent(bdev->dev, BAM_DESC_FIFO_SIZE, bchan->fifo_virt,
> +				bchan->fifo_phys);
> +
> +	/* mask irq for pipe/channel */
> +	val = ioread32(bdev->regs + BAM_IRQ_SRCS_MSK_EE(bchan->ee));
> +	val &= ~(1 << bchan->id);
> +	iowrite32(val, bdev->regs + BAM_IRQ_SRCS_MSK_EE(bchan->ee));
> +
> +	/* disable irq */
> +	iowrite32(0, bdev->regs + BAM_P_IRQ_EN(bchan->id));
> +
> +	clear_bit(bchan->ee, &bdev->enabled_ees);
> +}
> +
> +/*
> + * bam_slave_config - set slave configuration for channel
> + * @chan: dma channel
> + * @cfg: slave configuration
> + *
> + * Sets slave configuration for channel
> + * Only allow setting direction once.  BAM channels are unidirectional
> + * and the direction is set in hardware.
> + *
> + */
> +static void bam_slave_config(struct bam_chan *bchan,
> +		struct bam_dma_slave_config *bcfg)

> +{
> +	struct bam_device *bdev = bchan->device;
> +
> +	bchan->bam_slave.desc_threshold = bcfg->desc_threshold;
what does the desc_threshold mean?

> +
> +	/* set desc threshold */
> +	iowrite32(bcfg->desc_threshold, bdev->regs + BAM_DESC_CNT_TRSHLD);
> +}
> +
> +/*
> + * bam_start_dma - loads up descriptors and starts dma
> + * @chan: dma channel
> + *
> + * Loads descriptors into descriptor fifo and starts dma controller
> + *
> + * NOTE: Must hold channel lock
> +*/
> +static void bam_start_dma(struct bam_chan *bchan)
> +{
> +	struct bam_device *bdev = bchan->device;
> +	struct bam_async_desc *async_desc, *_adesc;
> +	u32 curr_len, val;
> +	u32 num_processed = 0;
> +
> +	if (list_empty(&bchan->pending))
> +		return;
> +
> +	curr_len = (bchan->head <= bchan->tail) ?
> +			bchan->tail - bchan->head :
> +			MAX_DESCRIPTORS - bchan->head + bchan->tail;
> +
> +	list_for_each_entry_safe(async_desc, _adesc, &bchan->pending, node) {
> +
> +		/* bust out if we are out of room */
> +		if (async_desc->num_desc + curr_len > MAX_DESCRIPTORS)
> +			break;
> +
> +		/* copy descriptors into fifo */
> +		if (bchan->tail + async_desc->num_desc > MAX_DESCRIPTORS) {
> +			u32 partial = MAX_DESCRIPTORS - bchan->tail;
> +
> +			memcpy(&bchan->fifo_virt[bchan->tail], async_desc->desc,
> +				partial * sizeof(struct bam_desc_hw));
> +			memcpy(bchan->fifo_virt, &async_desc->desc[partial],
> +				(async_desc->num_desc - partial) *
> +					sizeof(struct bam_desc_hw));
memcpy for device memory copy?
> +		} else {
> +			memcpy(&bchan->fifo_virt[bchan->tail], async_desc->desc,
> +				async_desc->num_desc *
> +				sizeof(struct bam_desc_hw));
> +		}
> +
> +		num_processed++;
> +		bchan->tail += async_desc->num_desc;
> +		bchan->tail %= MAX_DESCRIPTORS;
> +		curr_len += async_desc->num_desc;
> +
> +		list_move_tail(&async_desc->node, &bchan->active);
> +	}
> +
> +	/* bail if we didn't queue anything to the active queue */
> +	if (!num_processed)
> +		return;
> +
> +	async_desc = list_first_entry(&bchan->active, struct bam_async_desc,
> +			node);
> +
> +	val = ioread32(bdev->regs + BAM_P_SW_OFSTS(bchan->id));
> +	val &= P_SW_OFSTS_MASK;
> +
> +	/* kick off dma by forcing a write event to the pipe */
> +	iowrite32((bchan->tail * sizeof(struct bam_desc_hw)),
> +			bdev->regs + BAM_P_EVNT_REG(bchan->id));
> +}
> +
> +/*
> + * bam_tx_submit - Adds transaction to channel pending queue
> + * @tx: transaction to queue
> + *
> + * Adds dma transaction to pending queue for channel
> + *
> +*/
> +static dma_cookie_t bam_tx_submit(struct dma_async_tx_descriptor *tx)
> +{
> +	struct bam_chan *bchan = to_bam_chan(tx->chan);
> +	struct bam_async_desc *desc = to_bam_async_desc(tx);
> +	dma_cookie_t cookie;
> +
> +	spin_lock_bh(&bchan->lock);
> +
> +	cookie = dma_cookie_assign(tx);
> +	list_add_tail(&desc->node, &bchan->pending);
> +
> +	spin_unlock_bh(&bchan->lock);
> +
> +	return cookie;
> +}
Okay you are *NOT* using virt-dma layer, all this can be removed if you do that,
this is similar to what vchan_tx_submit() does!

> +
> +/*
> + * bam_prep_slave_sg - Prep slave sg transaction
> + *
> + * @chan: dma channel
> + * @sgl: scatter gather list
> + * @sg_len: length of sg
> + * @direction: DMA transfer direction
> + * @flags: DMA flags
> + * @context: transfer context (unused)
> + */
> +static struct dma_async_tx_descriptor *bam_prep_slave_sg(struct dma_chan *chan,
> +	struct scatterlist *sgl, unsigned int sg_len,
> +	enum dma_transfer_direction direction, unsigned long flags,
> +	void *context)
> +{
> +	struct bam_chan *bchan = to_bam_chan(chan);
> +	struct bam_device *bdev = bchan->device;
> +	struct bam_async_desc *async_desc = NULL;
> +	struct scatterlist *sg;
> +	u32 i;
> +	struct bam_desc_hw *desc;
> +
> +
> +	if (!is_slave_direction(direction)) {
> +		dev_err(bdev->dev, "invalid dma direction\n");
> +		goto err_out;
> +	}
> +
> +	/* direction has to match pipe configuration from the slave config */
> +	if (direction != bchan->bam_slave.slave.direction) {
> +		dev_err(bdev->dev,
> +				"trans does not match channel configuration\n");
> +		goto err_out;
> +	}
> +
> +	/* make sure number of descriptors will fit within the fifo */
> +	if (sg_len > MAX_DESCRIPTORS) {
> +		dev_err(bdev->dev, "not enough fifo descriptor space\n");
> +		goto err_out;
> +	}
what prevents you from assigning more virtual descriptors to this case and then
submit those after HW descriptors are done.
> +
> +	/* allocate enough room to accomodate the number of entries */
> +	async_desc = kzalloc(sizeof(*async_desc) +
> +			(sg_len * sizeof(struct bam_desc_hw)), GFP_KERNEL);
this cna be called from non sleepy context and the recommedation is to use
GFP_NOWAIT for memory allocation

> +static int bam_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
> +	unsigned long arg)
> +{
> +	struct bam_chan *bchan = to_bam_chan(chan);
> +	struct bam_device *bdev = bchan->device;
> +	struct bam_dma_slave_config *bconfig;
> +	int ret = 0;
> +
> +	switch (cmd) {
> +	case DMA_PAUSE:
> +		spin_lock_bh(&bchan->lock);
> +		iowrite32(1, bdev->regs + BAM_P_HALT(bchan->id));
> +		spin_unlock_bh(&bchan->lock);
> +		break;
> +	case DMA_RESUME:
> +		spin_lock_bh(&bchan->lock);
> +		iowrite32(0, bdev->regs + BAM_P_HALT(bchan->id));
> +		spin_unlock_bh(&bchan->lock);
> +		break;
> +	case DMA_TERMINATE_ALL:
> +		bam_dma_terminate_all(chan);
> +		break;
> +	case DMA_SLAVE_CONFIG:
> +		bconfig = (struct bam_dma_slave_config *)arg;
> +		bam_slave_config(bchan, bconfig);
DMA_SLAVE_CONFIG expects arg as struct dma_slave_config, not this. Pls dont
voilate APIs
> +		break;
> +	default:
> +		ret = -EIO;
I would expect -ENXIO here!

> +		break;
> +	}
> +
> +	return ret;
> +}
> +
> +/*
> + * bam_tx_status - returns status of transaction
> + * @chan: dma channel
> + * @cookie: transaction cookie
> + * @txstate: DMA transaction state
> + *
> + * Return status of dma transaction
> + */
> +static enum dma_status bam_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
> +		struct dma_tx_state *txstate)
> +{
> +	return dma_cookie_status(chan, cookie, txstate);
hmmm, this wont work in many cases for slave. For example if you try to get this
working with audio you need to provide the residue values.
The results you are providing here will not be useful, so I would recommedn
fixing it

> +
> +static int bam_dma_probe(struct platform_device *pdev)
> +{
> +	struct bam_device *bdev;
> +	int err, i;
> +
> +	bdev = kzalloc(sizeof(*bdev), GFP_KERNEL);
devm_ pls

> +	if (!bdev) {
> +		dev_err(&pdev->dev, "insufficient memory for private data\n");
> +		err = -ENOMEM;
> +		goto err_no_bdev;
> +	}
> +
> +	bdev->dev = &pdev->dev;
> +	dev_set_drvdata(bdev->dev, bdev);
> +
> +	bdev->regs = of_iomap(pdev->dev.of_node, 0);
> +	if (!bdev->regs) {
> +		dev_err(bdev->dev, "unable to ioremap base\n");
> +		err = -ENOMEM;
> +		goto err_free_bamdev;
> +	}
> +
> +	bdev->irq = irq_of_parse_and_map(pdev->dev.of_node, 0);
> +	if (bdev->irq == NO_IRQ) {
> +		dev_err(bdev->dev, "unable to map irq\n");
> +		err = -EINVAL;
> +		goto err_unmap_mem;
> +	}
> +
> +	bdev->bamclk = devm_clk_get(bdev->dev, "bam_clk");
> +	if (IS_ERR(bdev->bamclk)) {
> +		err = PTR_ERR(bdev->bamclk);
> +		goto err_free_irq;
> +	}
> +
> +	err = clk_prepare_enable(bdev->bamclk);
> +	if (err) {
> +		dev_err(bdev->dev, "failed to prepare/enable clock");
> +		goto err_free_irq;
> +	}
> +
> +	err = request_irq(bdev->irq, &bam_dma_irq, IRQF_TRIGGER_HIGH, "bam_dma",
> +				bdev);
> +	if (err) {
> +		dev_err(bdev->dev, "error requesting irq\n");
> +		err = -EINVAL;
> +		goto err_disable_clk;
> +	}
> +
> +	if (bam_init(bdev)) {
> +		dev_err(bdev->dev, "cannot initialize bam device\n");
> +		err = -EINVAL;
> +		goto err_disable_clk;
> +	}
> +
> +	bdev->channels = kzalloc(sizeof(*bdev->channels) * bdev->num_channels,
> +				GFP_KERNEL);
> +
> +	if (!bdev->channels) {
> +		dev_err(bdev->dev, "unable to allocate channels\n");
> +		err = -ENOMEM;
> +		goto err_disable_clk;
> +	}
> +
> +	/* allocate and initialize channels */
> +	INIT_LIST_HEAD(&bdev->common.channels);
> +
> +	for (i = 0; i < bdev->num_channels; i++)
> +		bam_channel_init(bdev, &bdev->channels[i], i);
> +
> +	/* set max dma segment size */
> +	bdev->common.dev = bdev->dev;
> +	bdev->common.dev->dma_parms = &bdev->dma_parms;
> +	if (dma_set_max_seg_size(bdev->common.dev, BAM_MAX_DATA_SIZE)) {
> +		dev_err(bdev->dev, "cannot set maximum segment size\n");
> +		goto err_disable_clk;
> +	}
> +
> +	/* set capabilities */
> +	dma_cap_zero(bdev->common.cap_mask);
> +	dma_cap_set(DMA_SLAVE, bdev->common.cap_mask);
> +
> +	/* initialize dmaengine apis */
> +	bdev->common.device_alloc_chan_resources = bam_alloc_chan;
> +	bdev->common.device_free_chan_resources = bam_free_chan;
> +	bdev->common.device_prep_slave_sg = bam_prep_slave_sg;
> +	bdev->common.device_control = bam_control;
> +	bdev->common.device_issue_pending = bam_issue_pending;
> +	bdev->common.device_tx_status = bam_tx_status;
> +	bdev->common.dev = bdev->dev;
> +
> +	dma_async_device_register(&bdev->common);
> +
> +	if (pdev->dev.of_node) {
> +		err = of_dma_controller_register(pdev->dev.of_node,
> +				bam_dma_xlate, &bdev->common);
> +
> +		if (err) {
> +			dev_err(bdev->dev, "failed to register of_dma\n");
> +			goto err_unregister_dma;
> +		}
> +	}
> +
> +	return 0;
> +
> +err_unregister_dma:
> +	dma_async_device_unregister(&bdev->common);
> +err_free_irq:
> +	free_irq(bdev->irq, bdev);
> +err_disable_clk:
> +	clk_disable_unprepare(bdev->bamclk);
> +err_unmap_mem:
> +	iounmap(bdev->regs);
> +err_free_bamdev:
> +	if (bdev)
> +		kfree(bdev->channels);
> +	kfree(bdev);
> +err_no_bdev:
you need to get yourslef introduced with devm_ friends to ease this part!

Overall I think driver needs to bit more plumbing and also needs to use the
virt-dma so that bunch of work already done is not redefined here.

--
~Vinod
Andy Gross Oct. 31, 2013, 9:46 p.m. UTC | #5
On Thu, Oct 31, 2013 at 10:29:48PM +0530, Vinod Koul wrote:
> On Fri, Oct 25, 2013 at 03:24:02PM -0500, Andy Gross wrote:
> 
> This should be sent to dmaengine@vger.kernel.org
 
I'll add the list when I send the second iteration or should I send it over mid
stream?

> > Add the DMA engine driver for the MSM Bus Access Manager (BAM) DMA controller
> > found in the MSM 8x74 platforms.
> > 
> > Each BAM DMA device is associated with a specific on-chip peripheral.  Each
> > channel provides a uni-directional data transfer engine that is capable of
> > transferring data between the peripheral and system memory (System mode) or
> > between two peripherals (BAM2BAM).
> > 
> > The initial release of this driver only supports slave transfers between
> > peripherals and system memory.
> > 
> > Signed-off-by: Andy Gross <agross@codeaurora.org>
> > +/*
> > + * bam_alloc_chan - Allocate channel resources for DMA channel.
> > + * @chan: specified channel
> > + *
> > + * This function allocates the FIFO descriptor memory and resets the channel
> > + */
> > +static int bam_alloc_chan(struct dma_chan *chan)
> > +{
> > +	struct bam_chan *bchan = to_bam_chan(chan);
> > +	struct bam_device *bdev = bchan->device;
> > +	u32 val;
> > +	union bam_pipe_ctrl pctrl;
> > +
> > +	/* check for channel activity */
> > +	pctrl.value = ioread32(bdev->regs + BAM_P_CTRL(bchan->id));
> > +	if (pctrl.bits.p_en) {
> > +		dev_err(bdev->dev, "channel already active\n");
> > +		return -EINVAL;
> > +	}
> > +
> > +	/* allocate FIFO descriptor space */
> > +	bchan->fifo_virt = (struct bam_desc_hw *)dma_alloc_coherent(bdev->dev,
> > +				BAM_DESC_FIFO_SIZE, &bchan->fifo_phys,
> > +				GFP_KERNEL);
> > +
> > +	if (!bchan->fifo_virt) {
> > +		dev_err(bdev->dev, "Failed to allocate descriptor fifo\n");
> > +		return -ENOMEM;
> > +	}
> > +
> > +	/* reset channel */
> > +	iowrite32(1, bdev->regs + BAM_P_RST(bchan->id));
> > +	iowrite32(0, bdev->regs + BAM_P_RST(bchan->id));
> > +
> > +	/* configure fifo address/size in bam channel registers */
> > +	iowrite32(bchan->fifo_phys, bdev->regs +
> > +			BAM_P_DESC_FIFO_ADDR(bchan->id));
> > +	iowrite32(BAM_DESC_FIFO_SIZE, bdev->regs +
> > +			BAM_P_FIFO_SIZES(bchan->id));
> > +
> > +	/* unmask and enable interrupts for defined EE, bam and error irqs */
> > +	iowrite32(BAM_IRQ_MSK, bdev->regs + BAM_IRQ_SRCS_EE(bchan->ee));
> > +
> > +	/* enable the per pipe interrupts, enable EOT and INT irqs */
> > +	iowrite32(P_DEFAULT_IRQS_EN, bdev->regs + BAM_P_IRQ_EN(bchan->id));
> > +
> > +	/* unmask the specific pipe and EE combo */
> > +	val = ioread32(bdev->regs + BAM_IRQ_SRCS_MSK_EE(bchan->ee));
> > +	val |= 1 << bchan->id;
> > +	iowrite32(val, bdev->regs + BAM_IRQ_SRCS_MSK_EE(bchan->ee));
> > +
> > +	/* set fixed direction and mode, then enable channel */
> I was going to question why you are doing hw specfic stuff in alloc channel but
> now why do you enable seems to be a bigger question in mind?

The fifo_virt is used to store the hardware descriptors that are used directly
by the dma controller.  I have to at least fill in the descriptor FIFO address
and size and reset the channel to clear the fifo offset inside the hardware.
After reset the internal fifo offset is 0.  And every subsequent transaction
increments this.  That is how it knows which descriptors to work on inside the
descriptor fifo memory.

I can definitely defer the rest of hte h/w interactions until the point that I
need to actually kick off the dma controller.


> > +	pctrl.value = 0;
> > +	pctrl.bits.p_direction =
> > +		(bchan->bam_slave.slave.direction == DMA_DEV_TO_MEM) ?
> > +			BAM_PIPE_PRODUCER : BAM_PIPE_CONSUMER;
> > +	pctrl.bits.p_sys_mode = BAM_PIPE_MODE_SYSTEM;
> > +	pctrl.bits.p_en = 1;
> > +
> > +	iowrite32(pctrl.value, bdev->regs + BAM_P_CTRL(bchan->id));
> > +
> > +	/* set desc threshold */
> > +	/* do bookkeeping for tracking used EEs, used during IRQ handling */
> > +	set_bit(bchan->ee, &bdev->enabled_ees);
> > +
> > +	bchan->head = 0;
> > +	bchan->tail = 0;
> > +
> > +	return 0;
> You said you are going to allocate descriptors so right thing would be return
> number of allocated desc here!

OK, I missed that.

> > +}
> > +
> > +/*
> > + * bam_free_chan - Frees dma resources associated with specific channel
> > + * @chan: specified channel
> > + *
> > + * Free the allocated fifo descriptor memory and channel resources
> > + *
> > + */
> > +static void bam_free_chan(struct dma_chan *chan)
> > +{
> > +	struct bam_chan *bchan = to_bam_chan(chan);
> > +	struct bam_device *bdev = bchan->device;
> > +	u32 val;
> > +
> Shouldn't you check if channel is busy?
> 

Yes, I'll add that in.  With no return code, how useful is this to the caller?


> > +	/* reset channel */
> > +	iowrite32(1, bdev->regs + BAM_P_RST(bchan->id));
> > +	iowrite32(0, bdev->regs + BAM_P_RST(bchan->id));
> > +
> > +	dma_free_coherent(bdev->dev, BAM_DESC_FIFO_SIZE, bchan->fifo_virt,
> > +				bchan->fifo_phys);
> > +
> > +	/* mask irq for pipe/channel */
> > +	val = ioread32(bdev->regs + BAM_IRQ_SRCS_MSK_EE(bchan->ee));
> > +	val &= ~(1 << bchan->id);
> > +	iowrite32(val, bdev->regs + BAM_IRQ_SRCS_MSK_EE(bchan->ee));
> > +
> > +	/* disable irq */
> > +	iowrite32(0, bdev->regs + BAM_P_IRQ_EN(bchan->id));
> > +
> > +	clear_bit(bchan->ee, &bdev->enabled_ees);
> > +}
> > +
> > +/*
> > + * bam_slave_config - set slave configuration for channel
> > + * @chan: dma channel
> > + * @cfg: slave configuration
> > + *
> > + * Sets slave configuration for channel
> > + * Only allow setting direction once.  BAM channels are unidirectional
> > + * and the direction is set in hardware.
> > + *
> > + */
> > +static void bam_slave_config(struct bam_chan *bchan,
> > +		struct bam_dma_slave_config *bcfg)
> 
> > +{
> > +	struct bam_device *bdev = bchan->device;
> > +
> > +	bchan->bam_slave.desc_threshold = bcfg->desc_threshold;
> what does the desc_threshold mean?
 
The desc threshhold determines the minimum number of bytes of descriptor that
causes a write event to be communicated to the peripheral.  I default to 4 bytes
(1 descriptor), but this is configurable through the DMA_SLAVE_CONFIG interface
using the extended slave_config structure.

> > +
> > +	/* set desc threshold */
> > +	iowrite32(bcfg->desc_threshold, bdev->regs + BAM_DESC_CNT_TRSHLD);
> > +}
> > +
> > +/*
> > + * bam_start_dma - loads up descriptors and starts dma
> > + * @chan: dma channel
> > + *
> > + * Loads descriptors into descriptor fifo and starts dma controller
> > + *
> > + * NOTE: Must hold channel lock
> > +*/
> > +static void bam_start_dma(struct bam_chan *bchan)
> > +{
> > +	struct bam_device *bdev = bchan->device;
> > +	struct bam_async_desc *async_desc, *_adesc;
> > +	u32 curr_len, val;
> > +	u32 num_processed = 0;
> > +
> > +	if (list_empty(&bchan->pending))
> > +		return;
> > +
> > +	curr_len = (bchan->head <= bchan->tail) ?
> > +			bchan->tail - bchan->head :
> > +			MAX_DESCRIPTORS - bchan->head + bchan->tail;
> > +
> > +	list_for_each_entry_safe(async_desc, _adesc, &bchan->pending, node) {
> > +
> > +		/* bust out if we are out of room */
> > +		if (async_desc->num_desc + curr_len > MAX_DESCRIPTORS)
> > +			break;
> > +
> > +		/* copy descriptors into fifo */
> > +		if (bchan->tail + async_desc->num_desc > MAX_DESCRIPTORS) {
> > +			u32 partial = MAX_DESCRIPTORS - bchan->tail;
> > +
> > +			memcpy(&bchan->fifo_virt[bchan->tail], async_desc->desc,
> > +				partial * sizeof(struct bam_desc_hw));
> > +			memcpy(bchan->fifo_virt, &async_desc->desc[partial],
> > +				(async_desc->num_desc - partial) *
> > +					sizeof(struct bam_desc_hw));
> memcpy for device memory copy?

My initial thought was that I needed to wait until now to fill in the
descriptors on the fifo that was allocated.  The fifo memory is accessed from
the dma controller.  The controller just manages an internal offset that rolls
over based on the size of the configured fifo memory.  I was keeping the
descriptors on hand until I needed to program them into the fifo memory, hence
the copy.

> > +		} else {
> > +			memcpy(&bchan->fifo_virt[bchan->tail], async_desc->desc,
> > +				async_desc->num_desc *
> > +				sizeof(struct bam_desc_hw));
> > +		}
> > +
> > +		num_processed++;
> > +		bchan->tail += async_desc->num_desc;
> > +		bchan->tail %= MAX_DESCRIPTORS;
> > +		curr_len += async_desc->num_desc;
> > +
> > +		list_move_tail(&async_desc->node, &bchan->active);
> > +	}
> > +
> > +	/* bail if we didn't queue anything to the active queue */
> > +	if (!num_processed)
> > +		return;
> > +
> > +	async_desc = list_first_entry(&bchan->active, struct bam_async_desc,
> > +			node);
> > +
> > +	val = ioread32(bdev->regs + BAM_P_SW_OFSTS(bchan->id));
> > +	val &= P_SW_OFSTS_MASK;
> > +
> > +	/* kick off dma by forcing a write event to the pipe */
> > +	iowrite32((bchan->tail * sizeof(struct bam_desc_hw)),
> > +			bdev->regs + BAM_P_EVNT_REG(bchan->id));
> > +}
> > +
> > +/*
> > + * bam_tx_submit - Adds transaction to channel pending queue
> > + * @tx: transaction to queue
> > + *
> > + * Adds dma transaction to pending queue for channel
> > + *
> > +*/
> > +static dma_cookie_t bam_tx_submit(struct dma_async_tx_descriptor *tx)
> > +{
> > +	struct bam_chan *bchan = to_bam_chan(tx->chan);
> > +	struct bam_async_desc *desc = to_bam_async_desc(tx);
> > +	dma_cookie_t cookie;
> > +
> > +	spin_lock_bh(&bchan->lock);
> > +
> > +	cookie = dma_cookie_assign(tx);
> > +	list_add_tail(&desc->node, &bchan->pending);
> > +
> > +	spin_unlock_bh(&bchan->lock);
> > +
> > +	return cookie;
> > +}
> Okay you are *NOT* using virt-dma layer, all this can be removed if you do that,
> this is similar to what vchan_tx_submit() does!
>

I'll look into reworking and utilizing the virt-dma layer.
 
> > +
> > +/*
> > + * bam_prep_slave_sg - Prep slave sg transaction
> > + *
> > + * @chan: dma channel
> > + * @sgl: scatter gather list
> > + * @sg_len: length of sg
> > + * @direction: DMA transfer direction
> > + * @flags: DMA flags
> > + * @context: transfer context (unused)
> > + */
> > +static struct dma_async_tx_descriptor *bam_prep_slave_sg(struct dma_chan *chan,
> > +	struct scatterlist *sgl, unsigned int sg_len,
> > +	enum dma_transfer_direction direction, unsigned long flags,
> > +	void *context)
> > +{
> > +	struct bam_chan *bchan = to_bam_chan(chan);
> > +	struct bam_device *bdev = bchan->device;
> > +	struct bam_async_desc *async_desc = NULL;
> > +	struct scatterlist *sg;
> > +	u32 i;
> > +	struct bam_desc_hw *desc;
> > +
> > +
> > +	if (!is_slave_direction(direction)) {
> > +		dev_err(bdev->dev, "invalid dma direction\n");
> > +		goto err_out;
> > +	}
> > +
> > +	/* direction has to match pipe configuration from the slave config */
> > +	if (direction != bchan->bam_slave.slave.direction) {
> > +		dev_err(bdev->dev,
> > +				"trans does not match channel configuration\n");
> > +		goto err_out;
> > +	}
> > +
> > +	/* make sure number of descriptors will fit within the fifo */
> > +	if (sg_len > MAX_DESCRIPTORS) {
> > +		dev_err(bdev->dev, "not enough fifo descriptor space\n");
> > +		goto err_out;
> > +	}
> what prevents you from assigning more virtual descriptors to this case and then
> submit those after HW descriptors are done.

I hadn't considered the virtual descriptors.  I'll see what I can do to utilize
that and rework this.  I can see the virtual descriptors simplifying this a good
bit.

> > +
> > +	/* allocate enough room to accomodate the number of entries */
> > +	async_desc = kzalloc(sizeof(*async_desc) +
> > +			(sg_len * sizeof(struct bam_desc_hw)), GFP_KERNEL);
> this cna be called from non sleepy context and the recommedation is to use
> GFP_NOWAIT for memory allocation
> 

I missed this.  I'll change it.

> > +static int bam_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
> > +	unsigned long arg)
> > +{
> > +	struct bam_chan *bchan = to_bam_chan(chan);
> > +	struct bam_device *bdev = bchan->device;
> > +	struct bam_dma_slave_config *bconfig;
> > +	int ret = 0;
> > +
> > +	switch (cmd) {
> > +	case DMA_PAUSE:
> > +		spin_lock_bh(&bchan->lock);
> > +		iowrite32(1, bdev->regs + BAM_P_HALT(bchan->id));
> > +		spin_unlock_bh(&bchan->lock);
> > +		break;
> > +	case DMA_RESUME:
> > +		spin_lock_bh(&bchan->lock);
> > +		iowrite32(0, bdev->regs + BAM_P_HALT(bchan->id));
> > +		spin_unlock_bh(&bchan->lock);
> > +		break;
> > +	case DMA_TERMINATE_ALL:
> > +		bam_dma_terminate_all(chan);
> > +		break;
> > +	case DMA_SLAVE_CONFIG:
> > +		bconfig = (struct bam_dma_slave_config *)arg;
> > +		bam_slave_config(bchan, bconfig);
> DMA_SLAVE_CONFIG expects arg as struct dma_slave_config, not this. Pls dont
> voilate APIs

So for extended slave_config structures, the caller needs to send in a ptr to
the dma_slave_config and then I can determine the bam_dma_slave_config.  Will
fix.

> > +		break;
> > +	default:
> > +		ret = -EIO;
> I would expect -ENXIO here!
> 

OK.

> > +		break;
> > +	}
> > +
> > +	return ret;
> > +}
> > +
> > +/*
> > + * bam_tx_status - returns status of transaction
> > + * @chan: dma channel
> > + * @cookie: transaction cookie
> > + * @txstate: DMA transaction state
> > + *
> > + * Return status of dma transaction
> > + */
> > +static enum dma_status bam_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
> > +		struct dma_tx_state *txstate)
> > +{
> > +	return dma_cookie_status(chan, cookie, txstate);
> hmmm, this wont work in many cases for slave. For example if you try to get this
> working with audio you need to provide the residue values.
> The results you are providing here will not be useful, so I would recommedn
> fixing it
> 

Ok so in this case I'd need to read where I am in the descriptor chain and
calculate the residual.  That shouldn't be a problem.

> > +
> > +static int bam_dma_probe(struct platform_device *pdev)
> > +{
> > +	struct bam_device *bdev;
> > +	int err, i;
> > +
> > +	bdev = kzalloc(sizeof(*bdev), GFP_KERNEL);
> devm_ pls
>

will fix.

> > +	if (!bdev) {
> > +		dev_err(&pdev->dev, "insufficient memory for private data\n");
> > +		err = -ENOMEM;
> > +		goto err_no_bdev;
> > +	}
> > +
> > +	bdev->dev = &pdev->dev;
> > +	dev_set_drvdata(bdev->dev, bdev);
> > +
> > +	bdev->regs = of_iomap(pdev->dev.of_node, 0);
> > +	if (!bdev->regs) {
> > +		dev_err(bdev->dev, "unable to ioremap base\n");
> > +		err = -ENOMEM;
> > +		goto err_free_bamdev;
> > +	}
> > +
> > +	bdev->irq = irq_of_parse_and_map(pdev->dev.of_node, 0);
> > +	if (bdev->irq == NO_IRQ) {
> > +		dev_err(bdev->dev, "unable to map irq\n");
> > +		err = -EINVAL;
> > +		goto err_unmap_mem;
> > +	}
> > +
> > +	bdev->bamclk = devm_clk_get(bdev->dev, "bam_clk");
> > +	if (IS_ERR(bdev->bamclk)) {
> > +		err = PTR_ERR(bdev->bamclk);
> > +		goto err_free_irq;
> > +	}
> > +
> > +	err = clk_prepare_enable(bdev->bamclk);
> > +	if (err) {
> > +		dev_err(bdev->dev, "failed to prepare/enable clock");
> > +		goto err_free_irq;
> > +	}
> > +
> > +	err = request_irq(bdev->irq, &bam_dma_irq, IRQF_TRIGGER_HIGH, "bam_dma",
> > +				bdev);
> > +	if (err) {
> > +		dev_err(bdev->dev, "error requesting irq\n");
> > +		err = -EINVAL;
> > +		goto err_disable_clk;
> > +	}
> > +
> > +	if (bam_init(bdev)) {
> > +		dev_err(bdev->dev, "cannot initialize bam device\n");
> > +		err = -EINVAL;
> > +		goto err_disable_clk;
> > +	}
> > +
> > +	bdev->channels = kzalloc(sizeof(*bdev->channels) * bdev->num_channels,
> > +				GFP_KERNEL);
> > +
> > +	if (!bdev->channels) {
> > +		dev_err(bdev->dev, "unable to allocate channels\n");
> > +		err = -ENOMEM;
> > +		goto err_disable_clk;
> > +	}
> > +
> > +	/* allocate and initialize channels */
> > +	INIT_LIST_HEAD(&bdev->common.channels);
> > +
> > +	for (i = 0; i < bdev->num_channels; i++)
> > +		bam_channel_init(bdev, &bdev->channels[i], i);
> > +
> > +	/* set max dma segment size */
> > +	bdev->common.dev = bdev->dev;
> > +	bdev->common.dev->dma_parms = &bdev->dma_parms;
> > +	if (dma_set_max_seg_size(bdev->common.dev, BAM_MAX_DATA_SIZE)) {
> > +		dev_err(bdev->dev, "cannot set maximum segment size\n");
> > +		goto err_disable_clk;
> > +	}
> > +
> > +	/* set capabilities */
> > +	dma_cap_zero(bdev->common.cap_mask);
> > +	dma_cap_set(DMA_SLAVE, bdev->common.cap_mask);
> > +
> > +	/* initialize dmaengine apis */
> > +	bdev->common.device_alloc_chan_resources = bam_alloc_chan;
> > +	bdev->common.device_free_chan_resources = bam_free_chan;
> > +	bdev->common.device_prep_slave_sg = bam_prep_slave_sg;
> > +	bdev->common.device_control = bam_control;
> > +	bdev->common.device_issue_pending = bam_issue_pending;
> > +	bdev->common.device_tx_status = bam_tx_status;
> > +	bdev->common.dev = bdev->dev;
> > +
> > +	dma_async_device_register(&bdev->common);
> > +
> > +	if (pdev->dev.of_node) {
> > +		err = of_dma_controller_register(pdev->dev.of_node,
> > +				bam_dma_xlate, &bdev->common);
> > +
> > +		if (err) {
> > +			dev_err(bdev->dev, "failed to register of_dma\n");
> > +			goto err_unregister_dma;
> > +		}
> > +	}
> > +
> > +	return 0;
> > +
> > +err_unregister_dma:
> > +	dma_async_device_unregister(&bdev->common);
> > +err_free_irq:
> > +	free_irq(bdev->irq, bdev);
> > +err_disable_clk:
> > +	clk_disable_unprepare(bdev->bamclk);
> > +err_unmap_mem:
> > +	iounmap(bdev->regs);
> > +err_free_bamdev:
> > +	if (bdev)
> > +		kfree(bdev->channels);
> > +	kfree(bdev);
> > +err_no_bdev:
> you need to get yourslef introduced with devm_ friends to ease this part!
> 
> Overall I think driver needs to bit more plumbing and also needs to use the
> virt-dma so that bunch of work already done is not redefined here.

I'll rework for comments and see how I can incorporate the virt-dma.
Andy Gross Nov. 7, 2013, 11:03 p.m. UTC | #6
On Thu, Oct 31, 2013 at 04:46:21PM -0500, Andy Gross wrote:
> On Thu, Oct 31, 2013 at 10:29:48PM +0530, Vinod Koul wrote:
> > On Fri, Oct 25, 2013 at 03:24:02PM -0500, Andy Gross wrote:
> > 
> > This should be sent to dmaengine@vger.kernel.org
>  
> I'll add the list when I send the second iteration or should I send it over mid
> stream?
> 
> > > Add the DMA engine driver for the MSM Bus Access Manager (BAM) DMA controller
> > > found in the MSM 8x74 platforms.
> > > 
> > > Each BAM DMA device is associated with a specific on-chip peripheral.  Each
> > > channel provides a uni-directional data transfer engine that is capable of
> > > transferring data between the peripheral and system memory (System mode) or
> > > between two peripherals (BAM2BAM).
> > > 
> > > The initial release of this driver only supports slave transfers between
> > > peripherals and system memory.
> > > 
> > > Signed-off-by: Andy Gross <agross@codeaurora.org>
> > > +/*
> > > + * bam_alloc_chan - Allocate channel resources for DMA channel.
> > > + * @chan: specified channel
> > > + *
> > > + * This function allocates the FIFO descriptor memory and resets the channel
> > > + */
> > > +static int bam_alloc_chan(struct dma_chan *chan)
> > > +{
> > > +	struct bam_chan *bchan = to_bam_chan(chan);
> > > +	struct bam_device *bdev = bchan->device;
> > > +	u32 val;
> > > +	union bam_pipe_ctrl pctrl;
> > > +
> > > +	/* check for channel activity */
> > > +	pctrl.value = ioread32(bdev->regs + BAM_P_CTRL(bchan->id));
> > > +	if (pctrl.bits.p_en) {
> > > +		dev_err(bdev->dev, "channel already active\n");
> > > +		return -EINVAL;
> > > +	}
> > > +
> > > +	/* allocate FIFO descriptor space */
> > > +	bchan->fifo_virt = (struct bam_desc_hw *)dma_alloc_coherent(bdev->dev,
> > > +				BAM_DESC_FIFO_SIZE, &bchan->fifo_phys,
> > > +				GFP_KERNEL);
> > > +
> > > +	if (!bchan->fifo_virt) {
> > > +		dev_err(bdev->dev, "Failed to allocate descriptor fifo\n");
> > > +		return -ENOMEM;
> > > +	}
> > > +
> > > +	/* reset channel */
> > > +	iowrite32(1, bdev->regs + BAM_P_RST(bchan->id));
> > > +	iowrite32(0, bdev->regs + BAM_P_RST(bchan->id));
> > > +
> > > +	/* configure fifo address/size in bam channel registers */
> > > +	iowrite32(bchan->fifo_phys, bdev->regs +
> > > +			BAM_P_DESC_FIFO_ADDR(bchan->id));
> > > +	iowrite32(BAM_DESC_FIFO_SIZE, bdev->regs +
> > > +			BAM_P_FIFO_SIZES(bchan->id));
> > > +
> > > +	/* unmask and enable interrupts for defined EE, bam and error irqs */
> > > +	iowrite32(BAM_IRQ_MSK, bdev->regs + BAM_IRQ_SRCS_EE(bchan->ee));
> > > +
> > > +	/* enable the per pipe interrupts, enable EOT and INT irqs */
> > > +	iowrite32(P_DEFAULT_IRQS_EN, bdev->regs + BAM_P_IRQ_EN(bchan->id));
> > > +
> > > +	/* unmask the specific pipe and EE combo */
> > > +	val = ioread32(bdev->regs + BAM_IRQ_SRCS_MSK_EE(bchan->ee));
> > > +	val |= 1 << bchan->id;
> > > +	iowrite32(val, bdev->regs + BAM_IRQ_SRCS_MSK_EE(bchan->ee));
> > > +
> > > +	/* set fixed direction and mode, then enable channel */
> > I was going to question why you are doing hw specfic stuff in alloc channel but
> > now why do you enable seems to be a bigger question in mind?
> 
> The fifo_virt is used to store the hardware descriptors that are used directly
> by the dma controller.  I have to at least fill in the descriptor FIFO address
> and size and reset the channel to clear the fifo offset inside the hardware.
> After reset the internal fifo offset is 0.  And every subsequent transaction
> increments this.  That is how it knows which descriptors to work on inside the
> descriptor fifo memory.
> 
> I can definitely defer the rest of hte h/w interactions until the point that I
> need to actually kick off the dma controller.
> 
> 
> > > +	pctrl.value = 0;
> > > +	pctrl.bits.p_direction =
> > > +		(bchan->bam_slave.slave.direction == DMA_DEV_TO_MEM) ?
> > > +			BAM_PIPE_PRODUCER : BAM_PIPE_CONSUMER;
> > > +	pctrl.bits.p_sys_mode = BAM_PIPE_MODE_SYSTEM;
> > > +	pctrl.bits.p_en = 1;
> > > +
> > > +	iowrite32(pctrl.value, bdev->regs + BAM_P_CTRL(bchan->id));
> > > +
> > > +	/* set desc threshold */
> > > +	/* do bookkeeping for tracking used EEs, used during IRQ handling */
> > > +	set_bit(bchan->ee, &bdev->enabled_ees);
> > > +
> > > +	bchan->head = 0;
> > > +	bchan->tail = 0;
> > > +
> > > +	return 0;
> > You said you are going to allocate descriptors so right thing would be return
> > number of allocated desc here!
> 
> OK, I missed that.
> 
> > > +}
> > > +
> > > +/*
> > > + * bam_free_chan - Frees dma resources associated with specific channel
> > > + * @chan: specified channel
> > > + *
> > > + * Free the allocated fifo descriptor memory and channel resources
> > > + *
> > > + */
> > > +static void bam_free_chan(struct dma_chan *chan)
> > > +{
> > > +	struct bam_chan *bchan = to_bam_chan(chan);
> > > +	struct bam_device *bdev = bchan->device;
> > > +	u32 val;
> > > +
> > Shouldn't you check if channel is busy?
> > 
> 
> Yes, I'll add that in.  With no return code, how useful is this to the caller?
> 
> 
> > > +	/* reset channel */
> > > +	iowrite32(1, bdev->regs + BAM_P_RST(bchan->id));
> > > +	iowrite32(0, bdev->regs + BAM_P_RST(bchan->id));
> > > +
> > > +	dma_free_coherent(bdev->dev, BAM_DESC_FIFO_SIZE, bchan->fifo_virt,
> > > +				bchan->fifo_phys);
> > > +
> > > +	/* mask irq for pipe/channel */
> > > +	val = ioread32(bdev->regs + BAM_IRQ_SRCS_MSK_EE(bchan->ee));
> > > +	val &= ~(1 << bchan->id);
> > > +	iowrite32(val, bdev->regs + BAM_IRQ_SRCS_MSK_EE(bchan->ee));
> > > +
> > > +	/* disable irq */
> > > +	iowrite32(0, bdev->regs + BAM_P_IRQ_EN(bchan->id));
> > > +
> > > +	clear_bit(bchan->ee, &bdev->enabled_ees);
> > > +}
> > > +
> > > +/*
> > > + * bam_slave_config - set slave configuration for channel
> > > + * @chan: dma channel
> > > + * @cfg: slave configuration
> > > + *
> > > + * Sets slave configuration for channel
> > > + * Only allow setting direction once.  BAM channels are unidirectional
> > > + * and the direction is set in hardware.
> > > + *
> > > + */
> > > +static void bam_slave_config(struct bam_chan *bchan,
> > > +		struct bam_dma_slave_config *bcfg)
> > 
> > > +{
> > > +	struct bam_device *bdev = bchan->device;
> > > +
> > > +	bchan->bam_slave.desc_threshold = bcfg->desc_threshold;
> > what does the desc_threshold mean?
>  
> The desc threshhold determines the minimum number of bytes of descriptor that
> causes a write event to be communicated to the peripheral.  I default to 4 bytes
> (1 descriptor), but this is configurable through the DMA_SLAVE_CONFIG interface
> using the extended slave_config structure.
> 
> > > +
> > > +	/* set desc threshold */
> > > +	iowrite32(bcfg->desc_threshold, bdev->regs + BAM_DESC_CNT_TRSHLD);
> > > +}
> > > +
> > > +/*
> > > + * bam_start_dma - loads up descriptors and starts dma
> > > + * @chan: dma channel
> > > + *
> > > + * Loads descriptors into descriptor fifo and starts dma controller
> > > + *
> > > + * NOTE: Must hold channel lock
> > > +*/
> > > +static void bam_start_dma(struct bam_chan *bchan)
> > > +{
> > > +	struct bam_device *bdev = bchan->device;
> > > +	struct bam_async_desc *async_desc, *_adesc;
> > > +	u32 curr_len, val;
> > > +	u32 num_processed = 0;
> > > +
> > > +	if (list_empty(&bchan->pending))
> > > +		return;
> > > +
> > > +	curr_len = (bchan->head <= bchan->tail) ?
> > > +			bchan->tail - bchan->head :
> > > +			MAX_DESCRIPTORS - bchan->head + bchan->tail;
> > > +
> > > +	list_for_each_entry_safe(async_desc, _adesc, &bchan->pending, node) {
> > > +
> > > +		/* bust out if we are out of room */
> > > +		if (async_desc->num_desc + curr_len > MAX_DESCRIPTORS)
> > > +			break;
> > > +
> > > +		/* copy descriptors into fifo */
> > > +		if (bchan->tail + async_desc->num_desc > MAX_DESCRIPTORS) {
> > > +			u32 partial = MAX_DESCRIPTORS - bchan->tail;
> > > +
> > > +			memcpy(&bchan->fifo_virt[bchan->tail], async_desc->desc,
> > > +				partial * sizeof(struct bam_desc_hw));
> > > +			memcpy(bchan->fifo_virt, &async_desc->desc[partial],
> > > +				(async_desc->num_desc - partial) *
> > > +					sizeof(struct bam_desc_hw));
> > memcpy for device memory copy?
> 
> My initial thought was that I needed to wait until now to fill in the
> descriptors on the fifo that was allocated.  The fifo memory is accessed from
> the dma controller.  The controller just manages an internal offset that rolls
> over based on the size of the configured fifo memory.  I was keeping the
> descriptors on hand until I needed to program them into the fifo memory, hence
> the copy.
> 
> > > +		} else {
> > > +			memcpy(&bchan->fifo_virt[bchan->tail], async_desc->desc,
> > > +				async_desc->num_desc *
> > > +				sizeof(struct bam_desc_hw));
> > > +		}
> > > +
> > > +		num_processed++;
> > > +		bchan->tail += async_desc->num_desc;
> > > +		bchan->tail %= MAX_DESCRIPTORS;
> > > +		curr_len += async_desc->num_desc;
> > > +
> > > +		list_move_tail(&async_desc->node, &bchan->active);
> > > +	}
> > > +
> > > +	/* bail if we didn't queue anything to the active queue */
> > > +	if (!num_processed)
> > > +		return;
> > > +
> > > +	async_desc = list_first_entry(&bchan->active, struct bam_async_desc,
> > > +			node);
> > > +
> > > +	val = ioread32(bdev->regs + BAM_P_SW_OFSTS(bchan->id));
> > > +	val &= P_SW_OFSTS_MASK;
> > > +
> > > +	/* kick off dma by forcing a write event to the pipe */
> > > +	iowrite32((bchan->tail * sizeof(struct bam_desc_hw)),
> > > +			bdev->regs + BAM_P_EVNT_REG(bchan->id));
> > > +}
> > > +
> > > +/*
> > > + * bam_tx_submit - Adds transaction to channel pending queue
> > > + * @tx: transaction to queue
> > > + *
> > > + * Adds dma transaction to pending queue for channel
> > > + *
> > > +*/
> > > +static dma_cookie_t bam_tx_submit(struct dma_async_tx_descriptor *tx)
> > > +{
> > > +	struct bam_chan *bchan = to_bam_chan(tx->chan);
> > > +	struct bam_async_desc *desc = to_bam_async_desc(tx);
> > > +	dma_cookie_t cookie;
> > > +
> > > +	spin_lock_bh(&bchan->lock);
> > > +
> > > +	cookie = dma_cookie_assign(tx);
> > > +	list_add_tail(&desc->node, &bchan->pending);
> > > +
> > > +	spin_unlock_bh(&bchan->lock);
> > > +
> > > +	return cookie;
> > > +}
> > Okay you are *NOT* using virt-dma layer, all this can be removed if you do that,
> > this is similar to what vchan_tx_submit() does!
> >
> 
> I'll look into reworking and utilizing the virt-dma layer.
>  

Is it acceptable to pick and choose the pieces of the virt-dma layer that
benefit me? Or do I have to absorb all of it.  The smaller helper structs and
functions are fine, but in some cases they force a certain implementation.

The bam_tx_submit and perhaps the cleanup functions are about the only pieces
I'd be able to leverage from the virt-dma, aside from the structures.

The main issue with the rest of the code is that it doesn't fit the behavior of
my dma controller.  Because i have a fixed sized circular buffer for my
descriptor FIFO, I have to copy in the new descriptors before I start up the
dma channel.

Using the vchan_cookie_complete() forces me to start the next transaction within
the interrupt, or schedule another tasklet to do that work for me.  The overhead
for copying what could be a large number of descriptors into the FIFO might
introduce too much latency inside the IRQ handler  (especially since this is done
to non-cached memory).  Using another tasklet for just restarting the dma
controller seems klunky to me.

I would rather start the next transaction within the tasklet queued from the IRQ
(vchan_cookie_complete), but because it uses it's own tasklet, I wouldn't be
able to leverage that.

The vchan_cookie_complete() usage within the IRQ handler also implys that only 1
dma transaction is completed per IRQ.  That might not be the case for me,
because I can advance the DMA FIFO offset to tell the controller to keep going
to the next transaction.  By the time I get to servicing the IRQ, I might have
completed more than 1 transaction.   I suppose you could call
vchan_cookie_complete() multiple times, but that seems wrong to me due to the
multiple calls to tasklet_schedule.


> > > +
> > > +/*
> > > + * bam_prep_slave_sg - Prep slave sg transaction
> > > + *
> > > + * @chan: dma channel
> > > + * @sgl: scatter gather list
> > > + * @sg_len: length of sg
> > > + * @direction: DMA transfer direction
> > > + * @flags: DMA flags
> > > + * @context: transfer context (unused)
> > > + */
> > > +static struct dma_async_tx_descriptor *bam_prep_slave_sg(struct dma_chan *chan,
> > > +	struct scatterlist *sgl, unsigned int sg_len,
> > > +	enum dma_transfer_direction direction, unsigned long flags,
> > > +	void *context)
> > > +{
> > > +	struct bam_chan *bchan = to_bam_chan(chan);
> > > +	struct bam_device *bdev = bchan->device;
> > > +	struct bam_async_desc *async_desc = NULL;
> > > +	struct scatterlist *sg;
> > > +	u32 i;
> > > +	struct bam_desc_hw *desc;
> > > +
> > > +
> > > +	if (!is_slave_direction(direction)) {
> > > +		dev_err(bdev->dev, "invalid dma direction\n");
> > > +		goto err_out;
> > > +	}
> > > +
> > > +	/* direction has to match pipe configuration from the slave config */
> > > +	if (direction != bchan->bam_slave.slave.direction) {
> > > +		dev_err(bdev->dev,
> > > +				"trans does not match channel configuration\n");
> > > +		goto err_out;
> > > +	}
> > > +
> > > +	/* make sure number of descriptors will fit within the fifo */
> > > +	if (sg_len > MAX_DESCRIPTORS) {
> > > +		dev_err(bdev->dev, "not enough fifo descriptor space\n");
> > > +		goto err_out;
> > > +	}
> > what prevents you from assigning more virtual descriptors to this case and then
> > submit those after HW descriptors are done.
> 
> I hadn't considered the virtual descriptors.  I'll see what I can do to utilize
> that and rework this.  I can see the virtual descriptors simplifying this a good
> bit.
> 
> > > +
> > > +	/* allocate enough room to accomodate the number of entries */
> > > +	async_desc = kzalloc(sizeof(*async_desc) +
> > > +			(sg_len * sizeof(struct bam_desc_hw)), GFP_KERNEL);
> > this cna be called from non sleepy context and the recommedation is to use
> > GFP_NOWAIT for memory allocation
> > 
> 
> I missed this.  I'll change it.
> 
> > > +static int bam_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
> > > +	unsigned long arg)
> > > +{
> > > +	struct bam_chan *bchan = to_bam_chan(chan);
> > > +	struct bam_device *bdev = bchan->device;
> > > +	struct bam_dma_slave_config *bconfig;
> > > +	int ret = 0;
> > > +
> > > +	switch (cmd) {
> > > +	case DMA_PAUSE:
> > > +		spin_lock_bh(&bchan->lock);
> > > +		iowrite32(1, bdev->regs + BAM_P_HALT(bchan->id));
> > > +		spin_unlock_bh(&bchan->lock);
> > > +		break;
> > > +	case DMA_RESUME:
> > > +		spin_lock_bh(&bchan->lock);
> > > +		iowrite32(0, bdev->regs + BAM_P_HALT(bchan->id));
> > > +		spin_unlock_bh(&bchan->lock);
> > > +		break;
> > > +	case DMA_TERMINATE_ALL:
> > > +		bam_dma_terminate_all(chan);
> > > +		break;
> > > +	case DMA_SLAVE_CONFIG:
> > > +		bconfig = (struct bam_dma_slave_config *)arg;
> > > +		bam_slave_config(bchan, bconfig);
> > DMA_SLAVE_CONFIG expects arg as struct dma_slave_config, not this. Pls dont
> > voilate APIs
> 
> So for extended slave_config structures, the caller needs to send in a ptr to
> the dma_slave_config and then I can determine the bam_dma_slave_config.  Will
> fix.
> 
> > > +		break;
> > > +	default:
> > > +		ret = -EIO;
> > I would expect -ENXIO here!
> > 
> 
> OK.
> 
> > > +		break;
> > > +	}
> > > +
> > > +	return ret;
> > > +}
> > > +
> > > +/*
> > > + * bam_tx_status - returns status of transaction
> > > + * @chan: dma channel
> > > + * @cookie: transaction cookie
> > > + * @txstate: DMA transaction state
> > > + *
> > > + * Return status of dma transaction
> > > + */
> > > +static enum dma_status bam_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
> > > +		struct dma_tx_state *txstate)
> > > +{
> > > +	return dma_cookie_status(chan, cookie, txstate);
> > hmmm, this wont work in many cases for slave. For example if you try to get this
> > working with audio you need to provide the residue values.
> > The results you are providing here will not be useful, so I would recommedn
> > fixing it
> > 
> 
> Ok so in this case I'd need to read where I am in the descriptor chain and
> calculate the residual.  That shouldn't be a problem.
> 
> > > +
> > > +static int bam_dma_probe(struct platform_device *pdev)
> > > +{
> > > +	struct bam_device *bdev;
> > > +	int err, i;
> > > +
> > > +	bdev = kzalloc(sizeof(*bdev), GFP_KERNEL);
> > devm_ pls
> >
> 
> will fix.
> 
> > > +	if (!bdev) {
> > > +		dev_err(&pdev->dev, "insufficient memory for private data\n");
> > > +		err = -ENOMEM;
> > > +		goto err_no_bdev;
> > > +	}
> > > +
> > > +	bdev->dev = &pdev->dev;
> > > +	dev_set_drvdata(bdev->dev, bdev);
> > > +
> > > +	bdev->regs = of_iomap(pdev->dev.of_node, 0);
> > > +	if (!bdev->regs) {
> > > +		dev_err(bdev->dev, "unable to ioremap base\n");
> > > +		err = -ENOMEM;
> > > +		goto err_free_bamdev;
> > > +	}
> > > +
> > > +	bdev->irq = irq_of_parse_and_map(pdev->dev.of_node, 0);
> > > +	if (bdev->irq == NO_IRQ) {
> > > +		dev_err(bdev->dev, "unable to map irq\n");
> > > +		err = -EINVAL;
> > > +		goto err_unmap_mem;
> > > +	}
> > > +
> > > +	bdev->bamclk = devm_clk_get(bdev->dev, "bam_clk");
> > > +	if (IS_ERR(bdev->bamclk)) {
> > > +		err = PTR_ERR(bdev->bamclk);
> > > +		goto err_free_irq;
> > > +	}
> > > +
> > > +	err = clk_prepare_enable(bdev->bamclk);
> > > +	if (err) {
> > > +		dev_err(bdev->dev, "failed to prepare/enable clock");
> > > +		goto err_free_irq;
> > > +	}
> > > +
> > > +	err = request_irq(bdev->irq, &bam_dma_irq, IRQF_TRIGGER_HIGH, "bam_dma",
> > > +				bdev);
> > > +	if (err) {
> > > +		dev_err(bdev->dev, "error requesting irq\n");
> > > +		err = -EINVAL;
> > > +		goto err_disable_clk;
> > > +	}
> > > +
> > > +	if (bam_init(bdev)) {
> > > +		dev_err(bdev->dev, "cannot initialize bam device\n");
> > > +		err = -EINVAL;
> > > +		goto err_disable_clk;
> > > +	}
> > > +
> > > +	bdev->channels = kzalloc(sizeof(*bdev->channels) * bdev->num_channels,
> > > +				GFP_KERNEL);
> > > +
> > > +	if (!bdev->channels) {
> > > +		dev_err(bdev->dev, "unable to allocate channels\n");
> > > +		err = -ENOMEM;
> > > +		goto err_disable_clk;
> > > +	}
> > > +
> > > +	/* allocate and initialize channels */
> > > +	INIT_LIST_HEAD(&bdev->common.channels);
> > > +
> > > +	for (i = 0; i < bdev->num_channels; i++)
> > > +		bam_channel_init(bdev, &bdev->channels[i], i);
> > > +
> > > +	/* set max dma segment size */
> > > +	bdev->common.dev = bdev->dev;
> > > +	bdev->common.dev->dma_parms = &bdev->dma_parms;
> > > +	if (dma_set_max_seg_size(bdev->common.dev, BAM_MAX_DATA_SIZE)) {
> > > +		dev_err(bdev->dev, "cannot set maximum segment size\n");
> > > +		goto err_disable_clk;
> > > +	}
> > > +
> > > +	/* set capabilities */
> > > +	dma_cap_zero(bdev->common.cap_mask);
> > > +	dma_cap_set(DMA_SLAVE, bdev->common.cap_mask);
> > > +
> > > +	/* initialize dmaengine apis */
> > > +	bdev->common.device_alloc_chan_resources = bam_alloc_chan;
> > > +	bdev->common.device_free_chan_resources = bam_free_chan;
> > > +	bdev->common.device_prep_slave_sg = bam_prep_slave_sg;
> > > +	bdev->common.device_control = bam_control;
> > > +	bdev->common.device_issue_pending = bam_issue_pending;
> > > +	bdev->common.device_tx_status = bam_tx_status;
> > > +	bdev->common.dev = bdev->dev;
> > > +
> > > +	dma_async_device_register(&bdev->common);
> > > +
> > > +	if (pdev->dev.of_node) {
> > > +		err = of_dma_controller_register(pdev->dev.of_node,
> > > +				bam_dma_xlate, &bdev->common);
> > > +
> > > +		if (err) {
> > > +			dev_err(bdev->dev, "failed to register of_dma\n");
> > > +			goto err_unregister_dma;
> > > +		}
> > > +	}
> > > +
> > > +	return 0;
> > > +
> > > +err_unregister_dma:
> > > +	dma_async_device_unregister(&bdev->common);
> > > +err_free_irq:
> > > +	free_irq(bdev->irq, bdev);
> > > +err_disable_clk:
> > > +	clk_disable_unprepare(bdev->bamclk);
> > > +err_unmap_mem:
> > > +	iounmap(bdev->regs);
> > > +err_free_bamdev:
> > > +	if (bdev)
> > > +		kfree(bdev->channels);
> > > +	kfree(bdev);
> > > +err_no_bdev:
> > you need to get yourslef introduced with devm_ friends to ease this part!
> > 
> > Overall I think driver needs to bit more plumbing and also needs to use the
> > virt-dma so that bunch of work already done is not redefined here.
> 
> I'll rework for comments and see how I can incorporate the virt-dma.
> 
> -- 
> sent by an employee of the Qualcomm Innovation Center, Inc.
> The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
> hosted by The Linux Foundation
> 
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
Vinod Koul Nov. 13, 2013, 1:18 p.m. UTC | #7
On Thu, Nov 07, 2013 at 05:03:17PM -0600, Andy Gross wrote:
> On Thu, Oct 31, 2013 at 04:46:21PM -0500, Andy Gross wrote:
> > On Thu, Oct 31, 2013 at 10:29:48PM +0530, Vinod Koul wrote:
> > > On Fri, Oct 25, 2013 at 03:24:02PM -0500, Andy Gross wrote:
> > > 
> > > This should be sent to dmaengine@vger.kernel.org
> >  
> > I'll add the list when I send the second iteration or should I send it over mid
> > stream?
either ways is okay, but pls make sure the next rev is sent on list.

> > 
> > > > +	/* reset channel */
> > > > +	iowrite32(1, bdev->regs + BAM_P_RST(bchan->id));
> > > > +	iowrite32(0, bdev->regs + BAM_P_RST(bchan->id));
> > > > +
> > > > +	dma_free_coherent(bdev->dev, BAM_DESC_FIFO_SIZE, bchan->fifo_virt,
> > > > +				bchan->fifo_phys);
> > > > +
> > > > +	/* mask irq for pipe/channel */
> > > > +	val = ioread32(bdev->regs + BAM_IRQ_SRCS_MSK_EE(bchan->ee));
> > > > +	val &= ~(1 << bchan->id);
> > > > +	iowrite32(val, bdev->regs + BAM_IRQ_SRCS_MSK_EE(bchan->ee));
> > > > +
> > > > +	/* disable irq */
> > > > +	iowrite32(0, bdev->regs + BAM_P_IRQ_EN(bchan->id));
> > > > +
> > > > +	clear_bit(bchan->ee, &bdev->enabled_ees);
> > > > +}
> > > > +
> > > > +/*
> > > > + * bam_slave_config - set slave configuration for channel
> > > > + * @chan: dma channel
> > > > + * @cfg: slave configuration
> > > > + *
> > > > + * Sets slave configuration for channel
> > > > + * Only allow setting direction once.  BAM channels are unidirectional
> > > > + * and the direction is set in hardware.
> > > > + *
> > > > + */
> > > > +static void bam_slave_config(struct bam_chan *bchan,
> > > > +		struct bam_dma_slave_config *bcfg)
> > > 
> > > > +{
> > > > +	struct bam_device *bdev = bchan->device;
> > > > +
> > > > +	bchan->bam_slave.desc_threshold = bcfg->desc_threshold;
> > > what does the desc_threshold mean?
> >  
> > The desc threshhold determines the minimum number of bytes of descriptor that
> > causes a write event to be communicated to the peripheral.  I default to 4 bytes
> > (1 descriptor), but this is configurable through the DMA_SLAVE_CONFIG interface
> > using the extended slave_config structure.
sounds like src/dst_maxburst?

> > > > + * bam_tx_submit - Adds transaction to channel pending queue
> > > > + * @tx: transaction to queue
> > > > + *
> > > > + * Adds dma transaction to pending queue for channel
> > > > + *
> > > > +*/
> > > > +static dma_cookie_t bam_tx_submit(struct dma_async_tx_descriptor *tx)
> > > > +{
> > > > +	struct bam_chan *bchan = to_bam_chan(tx->chan);
> > > > +	struct bam_async_desc *desc = to_bam_async_desc(tx);
> > > > +	dma_cookie_t cookie;
> > > > +
> > > > +	spin_lock_bh(&bchan->lock);
> > > > +
> > > > +	cookie = dma_cookie_assign(tx);
> > > > +	list_add_tail(&desc->node, &bchan->pending);
> > > > +
> > > > +	spin_unlock_bh(&bchan->lock);
> > > > +
> > > > +	return cookie;
> > > > +}
> > > Okay you are *NOT* using virt-dma layer, all this can be removed if you do that,
> > > this is similar to what vchan_tx_submit() does!
> > >
> > 
> > I'll look into reworking and utilizing the virt-dma layer.
> >  
> 
> Is it acceptable to pick and choose the pieces of the virt-dma layer that
> benefit me? Or do I have to absorb all of it.  The smaller helper structs and
> functions are fine, but in some cases they force a certain implementation.
and that implementation is the right one and the what we expect from users!

> The bam_tx_submit and perhaps the cleanup functions are about the only pieces
> I'd be able to leverage from the virt-dma, aside from the structures.
> 
> The main issue with the rest of the code is that it doesn't fit the behavior of
> my dma controller.  Because i have a fixed sized circular buffer for my
> descriptor FIFO, I have to copy in the new descriptors before I start up the
> dma channel.
the virt-dma is for managing the descriptors and the lists for managing the
descriptors. Using this is right way and is based on dmaengine APIs and not on
dma controllers, so I see no reason why you cant use this!

> Using the vchan_cookie_complete() forces me to start the next transaction within
> the interrupt, or schedule another tasklet to do that work for me.  The overhead
> for copying what could be a large number of descriptors into the FIFO might
> introduce too much latency inside the IRQ handler  (especially since this is done
> to non-cached memory).  Using another tasklet for just restarting the dma
> controller seems klunky to me.
That is the expectation from API. Once a txn is complete, you need to quickly
start the next one in the completion.

> I would rather start the next transaction within the tasklet queued from the IRQ
> (vchan_cookie_complete), but because it uses it's own tasklet, I wouldn't be
> able to leverage that.
why dont you call the vchan_cookie_complete from the irq. That will trigger the
virt-tasklet to complete the current one, then you schedule your tasklet to
program the next one.

> The vchan_cookie_complete() usage within the IRQ handler also implys that only 1
> dma transaction is completed per IRQ.  That might not be the case for me,
> because I can advance the DMA FIFO offset to tell the controller to keep going
> to the next transaction.  By the time I get to servicing the IRQ, I might have
> completed more than 1 transaction.   I suppose you could call
> vchan_cookie_complete() multiple times, but that seems wrong to me due to the
> multiple calls to tasklet_schedule.
I think you are mistaken here. If you have issued 3 descriptors to your HW, then
assuming you got single completion (which IMO is wrong and you should get
interrupt for every completion), then you mark all three as completed, the
completion would move all the completed descriptors

> > > > +static int bam_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
> > > > +	unsigned long arg)
> > > > +{
> > > > +	struct bam_chan *bchan = to_bam_chan(chan);
> > > > +	struct bam_device *bdev = bchan->device;
> > > > +	struct bam_dma_slave_config *bconfig;
> > > > +	int ret = 0;
> > > > +
> > > > +	switch (cmd) {
> > > > +	case DMA_PAUSE:
> > > > +		spin_lock_bh(&bchan->lock);
> > > > +		iowrite32(1, bdev->regs + BAM_P_HALT(bchan->id));
> > > > +		spin_unlock_bh(&bchan->lock);
> > > > +		break;
> > > > +	case DMA_RESUME:
> > > > +		spin_lock_bh(&bchan->lock);
> > > > +		iowrite32(0, bdev->regs + BAM_P_HALT(bchan->id));
> > > > +		spin_unlock_bh(&bchan->lock);
> > > > +		break;
> > > > +	case DMA_TERMINATE_ALL:
> > > > +		bam_dma_terminate_all(chan);
> > > > +		break;
> > > > +	case DMA_SLAVE_CONFIG:
> > > > +		bconfig = (struct bam_dma_slave_config *)arg;
> > > > +		bam_slave_config(bchan, bconfig);
> > > DMA_SLAVE_CONFIG expects arg as struct dma_slave_config, not this. Pls dont
> > > voilate APIs
> > 
> > So for extended slave_config structures, the caller needs to send in a ptr to
> > the dma_slave_config and then I can determine the bam_dma_slave_config.  Will
> > fix.
What are the additional parameters you need to "extended" config?

> > 
> > > > +		break;
> > > > +	default:
> > > > +		ret = -EIO;
> > > I would expect -ENXIO here!
> > > 
> > 
> > OK.
> > 
> > > > +		break;
> > > > +	}
> > > > +
> > > > +	return ret;
> > > > +}
> > > > +
> > > > +/*
> > > > + * bam_tx_status - returns status of transaction
> > > > + * @chan: dma channel
> > > > + * @cookie: transaction cookie
> > > > + * @txstate: DMA transaction state
> > > > + *
> > > > + * Return status of dma transaction
> > > > + */
> > > > +static enum dma_status bam_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
> > > > +		struct dma_tx_state *txstate)
> > > > +{
> > > > +	return dma_cookie_status(chan, cookie, txstate);
> > > hmmm, this wont work in many cases for slave. For example if you try to get this
> > > working with audio you need to provide the residue values.
> > > The results you are providing here will not be useful, so I would recommedn
> > > fixing it
> > > 
> > 
> > Ok so in this case I'd need to read where I am in the descriptor chain and
> > calculate the residual.  That shouldn't be a problem.
Yup!
Andy Gross Nov. 13, 2013, 9:03 p.m. UTC | #8
On Wed, Nov 13, 2013 at 06:48:12PM +0530, Vinod Koul wrote:
> On Thu, Nov 07, 2013 at 05:03:17PM -0600, Andy Gross wrote:
> > On Thu, Oct 31, 2013 at 04:46:21PM -0500, Andy Gross wrote:
> > > On Thu, Oct 31, 2013 at 10:29:48PM +0530, Vinod Koul wrote:
> > > > On Fri, Oct 25, 2013 at 03:24:02PM -0500, Andy Gross wrote:
> > > > 
> > > > This should be sent to dmaengine@vger.kernel.org
> > >  
> > > I'll add the list when I send the second iteration or should I send it over mid
> > > stream?
> either ways is okay, but pls make sure the next rev is sent on list.
> 
> > > 
> > > > > +	/* reset channel */
> > > > > +	iowrite32(1, bdev->regs + BAM_P_RST(bchan->id));
> > > > > +	iowrite32(0, bdev->regs + BAM_P_RST(bchan->id));
> > > > > +
> > > > > +	dma_free_coherent(bdev->dev, BAM_DESC_FIFO_SIZE, bchan->fifo_virt,
> > > > > +				bchan->fifo_phys);
> > > > > +
> > > > > +	/* mask irq for pipe/channel */
> > > > > +	val = ioread32(bdev->regs + BAM_IRQ_SRCS_MSK_EE(bchan->ee));
> > > > > +	val &= ~(1 << bchan->id);
> > > > > +	iowrite32(val, bdev->regs + BAM_IRQ_SRCS_MSK_EE(bchan->ee));
> > > > > +
> > > > > +	/* disable irq */
> > > > > +	iowrite32(0, bdev->regs + BAM_P_IRQ_EN(bchan->id));
> > > > > +
> > > > > +	clear_bit(bchan->ee, &bdev->enabled_ees);
> > > > > +}
> > > > > +
> > > > > +/*
> > > > > + * bam_slave_config - set slave configuration for channel
> > > > > + * @chan: dma channel
> > > > > + * @cfg: slave configuration
> > > > > + *
> > > > > + * Sets slave configuration for channel
> > > > > + * Only allow setting direction once.  BAM channels are unidirectional
> > > > > + * and the direction is set in hardware.
> > > > > + *
> > > > > + */
> > > > > +static void bam_slave_config(struct bam_chan *bchan,
> > > > > +		struct bam_dma_slave_config *bcfg)
> > > > 
> > > > > +{
> > > > > +	struct bam_device *bdev = bchan->device;
> > > > > +
> > > > > +	bchan->bam_slave.desc_threshold = bcfg->desc_threshold;
> > > > what does the desc_threshold mean?
> > >  
> > > The desc threshhold determines the minimum number of bytes of descriptor that
> > > causes a write event to be communicated to the peripheral.  I default to 4 bytes
> > > (1 descriptor), but this is configurable through the DMA_SLAVE_CONFIG interface
> > > using the extended slave_config structure.
> sounds like src/dst_maxburst?
 
Ok, i wasn't sure if that really matched.  That simplifies this and gets me out
of dealing with a extended slave_config structure.  At least until I need to add
support for some of the other versions of our BAM DMA controller.

> > > > > + * bam_tx_submit - Adds transaction to channel pending queue
> > > > > + * @tx: transaction to queue
> > > > > + *
> > > > > + * Adds dma transaction to pending queue for channel
> > > > > + *
> > > > > +*/
> > > > > +static dma_cookie_t bam_tx_submit(struct dma_async_tx_descriptor *tx)
> > > > > +{
> > > > > +	struct bam_chan *bchan = to_bam_chan(tx->chan);
> > > > > +	struct bam_async_desc *desc = to_bam_async_desc(tx);
> > > > > +	dma_cookie_t cookie;
> > > > > +
> > > > > +	spin_lock_bh(&bchan->lock);
> > > > > +
> > > > > +	cookie = dma_cookie_assign(tx);
> > > > > +	list_add_tail(&desc->node, &bchan->pending);
> > > > > +
> > > > > +	spin_unlock_bh(&bchan->lock);
> > > > > +
> > > > > +	return cookie;
> > > > > +}
> > > > Okay you are *NOT* using virt-dma layer, all this can be removed if you do that,
> > > > this is similar to what vchan_tx_submit() does!
> > > >
> > > 
> > > I'll look into reworking and utilizing the virt-dma layer.
> > >  
> > 
> > Is it acceptable to pick and choose the pieces of the virt-dma layer that
> > benefit me? Or do I have to absorb all of it.  The smaller helper structs and
> > functions are fine, but in some cases they force a certain implementation.
> and that implementation is the right one and the what we expect from users!
> 
> > The bam_tx_submit and perhaps the cleanup functions are about the only pieces
> > I'd be able to leverage from the virt-dma, aside from the structures.
> > 
> > The main issue with the rest of the code is that it doesn't fit the behavior of
> > my dma controller.  Because i have a fixed sized circular buffer for my
> > descriptor FIFO, I have to copy in the new descriptors before I start up the
> > dma channel.
> the virt-dma is for managing the descriptors and the lists for managing the
> descriptors. Using this is right way and is based on dmaengine APIs and not on
> dma controllers, so I see no reason why you cant use this!
> 

Let me expand on how our hardware works:

The BAM controller requires the use of external memory for storage of the
hardware descriptors for each channel.  The location and size of the FIFO is
programmed into a set of registers.  The controller keeps track of the current
offset to be worked on within that FIFO.  Kicking off a transaction is as simple
as updating one register to indicate the offset of the next free descriptor.

So in the alloc_channel(), I allocate the FIFO.  However, I never consume any
FIFO space until the tx_submit() is called.  The prep_slave_sg() cannot consume
FIFO which means I have to keep around a copy of the sg list (or descriptors
that correspond to each entry).

> > Using the vchan_cookie_complete() forces me to start the next transaction within
> > the interrupt, or schedule another tasklet to do that work for me.  The overhead
> > for copying what could be a large number of descriptors into the FIFO might
> > introduce too much latency inside the IRQ handler  (especially since this is done
> > to non-cached memory).  Using another tasklet for just restarting the dma
> > controller seems klunky to me.
> That is the expectation from API. Once a txn is complete, you need to quickly
> start the next one in the completion.
>
> > I would rather start the next transaction within the tasklet queued from the IRQ
> > (vchan_cookie_complete), but because it uses it's own tasklet, I wouldn't be
> > able to leverage that.
> why dont you call the vchan_cookie_complete from the irq. That will trigger the
> virt-tasklet to complete the current one, then you schedule your tasklet to
> program the next one.
> 
This equates to serialization of DMA transactions on a channel.  We can't allow
the controller to immediately start on the next transaction until we've fully
processed the current one.  This introduces latency between each transaction for
no real reason.

> > The vchan_cookie_complete() usage within the IRQ handler also implys that only 1
> > dma transaction is completed per IRQ.  That might not be the case for me,
> > because I can advance the DMA FIFO offset to tell the controller to keep going
> > to the next transaction.  By the time I get to servicing the IRQ, I might have
> > completed more than 1 transaction.   I suppose you could call
> > vchan_cookie_complete() multiple times, but that seems wrong to me due to the
> > multiple calls to tasklet_schedule.
> I think you are mistaken here. If you have issued 3 descriptors to your HW, then
> assuming you got single completion (which IMO is wrong and you should get
> interrupt for every completion), then you mark all three as completed, the
> completion would move all the completed descriptors
>
Ok, lets say you have 1 large DMA transaction followed by 2 really small ones.
If I was to go ahead and advance my FIFO offset to indicate the 2 new
transactions (after an issue_pending) while the first transaction is still being worked on, I could
conceivably have all three complete by the time the ISR is serviced.

In the virt-dma model, I wouldn't advance my FIFO offset until the currently
running transaction completes.  The latency of kicking off that next transaction
means my channel utilization goes down because my controller is idle until I get
around to executing the tasklet to kick off the next one.
 
I think I'll just implement this both ways and gather some metrics on what it
costs me to use the virt-dma.

> > > > > +static int bam_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
> > > > > +	unsigned long arg)
> > > > > +{
> > > > > +	struct bam_chan *bchan = to_bam_chan(chan);
> > > > > +	struct bam_device *bdev = bchan->device;
> > > > > +	struct bam_dma_slave_config *bconfig;
> > > > > +	int ret = 0;
> > > > > +
> > > > > +	switch (cmd) {
> > > > > +	case DMA_PAUSE:
> > > > > +		spin_lock_bh(&bchan->lock);
> > > > > +		iowrite32(1, bdev->regs + BAM_P_HALT(bchan->id));
> > > > > +		spin_unlock_bh(&bchan->lock);
> > > > > +		break;
> > > > > +	case DMA_RESUME:
> > > > > +		spin_lock_bh(&bchan->lock);
> > > > > +		iowrite32(0, bdev->regs + BAM_P_HALT(bchan->id));
> > > > > +		spin_unlock_bh(&bchan->lock);
> > > > > +		break;
> > > > > +	case DMA_TERMINATE_ALL:
> > > > > +		bam_dma_terminate_all(chan);
> > > > > +		break;
> > > > > +	case DMA_SLAVE_CONFIG:
> > > > > +		bconfig = (struct bam_dma_slave_config *)arg;
> > > > > +		bam_slave_config(bchan, bconfig);
> > > > DMA_SLAVE_CONFIG expects arg as struct dma_slave_config, not this. Pls dont
> > > > voilate APIs
> > > 
> > > So for extended slave_config structures, the caller needs to send in a ptr to
> > > the dma_slave_config and then I can determine the bam_dma_slave_config.  Will
> > > fix.
> What are the additional parameters you need to "extended" config?
> 
> > > 
> > > > > +		break;
> > > > > +	default:
> > > > > +		ret = -EIO;
> > > > I would expect -ENXIO here!
> > > > 
> > > 
> > > OK.
> > > 
> > > > > +		break;
> > > > > +	}
> > > > > +
> > > > > +	return ret;
> > > > > +}
> > > > > +
> > > > > +/*
> > > > > + * bam_tx_status - returns status of transaction
> > > > > + * @chan: dma channel
> > > > > + * @cookie: transaction cookie
> > > > > + * @txstate: DMA transaction state
> > > > > + *
> > > > > + * Return status of dma transaction
> > > > > + */
> > > > > +static enum dma_status bam_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
> > > > > +		struct dma_tx_state *txstate)
> > > > > +{
> > > > > +	return dma_cookie_status(chan, cookie, txstate);
> > > > hmmm, this wont work in many cases for slave. For example if you try to get this
> > > > working with audio you need to provide the residue values.
> > > > The results you are providing here will not be useful, so I would recommedn
> > > > fixing it
> > > > 
> > > 
> > > Ok so in this case I'd need to read where I am in the descriptor chain and
> > > calculate the residual.  That shouldn't be a problem.
> Yup!
Tomasz Figa Nov. 13, 2013, 10:07 p.m. UTC | #9
Hi Andy,

On Wednesday 30 of October 2013 15:31:05 Andy Gross wrote:
> On Tue, Oct 29, 2013 at 10:56:03AM -0700, Stephen Boyd wrote:
> > On 10/25, Andy Gross wrote:
> > > diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
> > > index f238cfd..a71b415 100644
> > > --- a/drivers/dma/Kconfig
> > > +++ b/drivers/dma/Kconfig
> > > @@ -364,4 +364,13 @@ config DMATEST
> > >  	  Simple DMA test client. Say N unless you're debugging a
> > >  	  DMA Device driver.
> > >  
> > > +
> > > +config MSM_BAM_DMA
> > > +	tristate "MSM BAM DMA support"
> > > +	depends on ARCH_MSM
> > 
> > It would be nice if we didn't have to rely on ARCH_MSM here so we
> > get more build coverage.
>  
> I can remove that.  There is nothing that forces this depend option.

Instead of removing, please add "|| COMPILE_TEST". This is the right way
to allow drivers not supposed to be run on given platform to build on it.

Best regards,
Tomasz
diff mbox

Patch

diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index f238cfd..a71b415 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -364,4 +364,13 @@  config DMATEST
 	  Simple DMA test client. Say N unless you're debugging a
 	  DMA Device driver.
 
+
+config MSM_BAM_DMA
+	tristate "MSM BAM DMA support"
+	depends on ARCH_MSM
+	select DMA_ENGINE
+	---help---
+	  Enable support for the MSM BAM DMA controller.  This controller
+	  provides DMA capabilities for a variety of on-chip devices.
+
 endif
diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
index db89035..69be540 100644
--- a/drivers/dma/Makefile
+++ b/drivers/dma/Makefile
@@ -41,3 +41,4 @@  obj-$(CONFIG_MMP_PDMA) += mmp_pdma.o
 obj-$(CONFIG_DMA_JZ4740) += dma-jz4740.o
 obj-$(CONFIG_TI_CPPI41) += cppi41.o
 obj-$(CONFIG_K3_DMA) += k3dma.o
+obj-$(CONFIG_MSM_BAM_DMA) += msm_bam_dma.o
diff --git a/drivers/dma/msm_bam_dma.c b/drivers/dma/msm_bam_dma.c
new file mode 100644
index 0000000..d16bf94
--- /dev/null
+++ b/drivers/dma/msm_bam_dma.c
@@ -0,0 +1,840 @@ 
+/*
+ * MSM BAM DMA engine driver
+ *
+ * Copyright (c) 2013, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/io.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmaengine.h>
+#include <linux/interrupt.h>
+#include <linux/scatterlist.h>
+#include <linux/device.h>
+#include <linux/platform_device.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/of_dma.h>
+#include <linux/clk.h>
+#include <linux/msm_bam_dma.h>
+
+#include "dmaengine.h"
+#include "msm_bam_dma_priv.h"
+
+
+/*
+ * bam_alloc_chan - Allocate channel resources for DMA channel.
+ * @chan: specified channel
+ *
+ * This function allocates the FIFO descriptor memory and resets the channel
+ */
+static int bam_alloc_chan(struct dma_chan *chan)
+{
+	struct bam_chan *bchan = to_bam_chan(chan);
+	struct bam_device *bdev = bchan->device;
+	u32 val;
+	union bam_pipe_ctrl pctrl;
+
+	/* check for channel activity */
+	pctrl.value = ioread32(bdev->regs + BAM_P_CTRL(bchan->id));
+	if (pctrl.bits.p_en) {
+		dev_err(bdev->dev, "channel already active\n");
+		return -EINVAL;
+	}
+
+	/* allocate FIFO descriptor space */
+	bchan->fifo_virt = (struct bam_desc_hw *)dma_alloc_coherent(bdev->dev,
+				BAM_DESC_FIFO_SIZE, &bchan->fifo_phys,
+				GFP_KERNEL);
+
+	if (!bchan->fifo_virt) {
+		dev_err(bdev->dev, "Failed to allocate descriptor fifo\n");
+		return -ENOMEM;
+	}
+
+	/* reset channel */
+	iowrite32(1, bdev->regs + BAM_P_RST(bchan->id));
+	iowrite32(0, bdev->regs + BAM_P_RST(bchan->id));
+
+	/* configure fifo address/size in bam channel registers */
+	iowrite32(bchan->fifo_phys, bdev->regs +
+			BAM_P_DESC_FIFO_ADDR(bchan->id));
+	iowrite32(BAM_DESC_FIFO_SIZE, bdev->regs +
+			BAM_P_FIFO_SIZES(bchan->id));
+
+	/* unmask and enable interrupts for defined EE, bam and error irqs */
+	iowrite32(BAM_IRQ_MSK, bdev->regs + BAM_IRQ_SRCS_EE(bchan->ee));
+
+	/* enable the per pipe interrupts, enable EOT and INT irqs */
+	iowrite32(P_DEFAULT_IRQS_EN, bdev->regs + BAM_P_IRQ_EN(bchan->id));
+
+	/* unmask the specific pipe and EE combo */
+	val = ioread32(bdev->regs + BAM_IRQ_SRCS_MSK_EE(bchan->ee));
+	val |= 1 << bchan->id;
+	iowrite32(val, bdev->regs + BAM_IRQ_SRCS_MSK_EE(bchan->ee));
+
+	/* set fixed direction and mode, then enable channel */
+	pctrl.value = 0;
+	pctrl.bits.p_direction =
+		(bchan->bam_slave.slave.direction == DMA_DEV_TO_MEM) ?
+			BAM_PIPE_PRODUCER : BAM_PIPE_CONSUMER;
+	pctrl.bits.p_sys_mode = BAM_PIPE_MODE_SYSTEM;
+	pctrl.bits.p_en = 1;
+
+	iowrite32(pctrl.value, bdev->regs + BAM_P_CTRL(bchan->id));
+
+	/* set desc threshold */
+	/* do bookkeeping for tracking used EEs, used during IRQ handling */
+	set_bit(bchan->ee, &bdev->enabled_ees);
+
+	bchan->head = 0;
+	bchan->tail = 0;
+
+	return 0;
+}
+
+/*
+ * bam_free_chan - Frees dma resources associated with specific channel
+ * @chan: specified channel
+ *
+ * Free the allocated fifo descriptor memory and channel resources
+ *
+ */
+static void bam_free_chan(struct dma_chan *chan)
+{
+	struct bam_chan *bchan = to_bam_chan(chan);
+	struct bam_device *bdev = bchan->device;
+	u32 val;
+
+	/* reset channel */
+	iowrite32(1, bdev->regs + BAM_P_RST(bchan->id));
+	iowrite32(0, bdev->regs + BAM_P_RST(bchan->id));
+
+	dma_free_coherent(bdev->dev, BAM_DESC_FIFO_SIZE, bchan->fifo_virt,
+				bchan->fifo_phys);
+
+	/* mask irq for pipe/channel */
+	val = ioread32(bdev->regs + BAM_IRQ_SRCS_MSK_EE(bchan->ee));
+	val &= ~(1 << bchan->id);
+	iowrite32(val, bdev->regs + BAM_IRQ_SRCS_MSK_EE(bchan->ee));
+
+	/* disable irq */
+	iowrite32(0, bdev->regs + BAM_P_IRQ_EN(bchan->id));
+
+	clear_bit(bchan->ee, &bdev->enabled_ees);
+}
+
+/*
+ * bam_slave_config - set slave configuration for channel
+ * @chan: dma channel
+ * @cfg: slave configuration
+ *
+ * Sets slave configuration for channel
+ * Only allow setting direction once.  BAM channels are unidirectional
+ * and the direction is set in hardware.
+ *
+ */
+static void bam_slave_config(struct bam_chan *bchan,
+		struct bam_dma_slave_config *bcfg)
+{
+	struct bam_device *bdev = bchan->device;
+
+	bchan->bam_slave.desc_threshold = bcfg->desc_threshold;
+
+	/* set desc threshold */
+	iowrite32(bcfg->desc_threshold, bdev->regs + BAM_DESC_CNT_TRSHLD);
+}
+
+/*
+ * bam_start_dma - loads up descriptors and starts dma
+ * @chan: dma channel
+ *
+ * Loads descriptors into descriptor fifo and starts dma controller
+ *
+ * NOTE: Must hold channel lock
+*/
+static void bam_start_dma(struct bam_chan *bchan)
+{
+	struct bam_device *bdev = bchan->device;
+	struct bam_async_desc *async_desc, *_adesc;
+	u32 curr_len, val;
+	u32 num_processed = 0;
+
+	if (list_empty(&bchan->pending))
+		return;
+
+	curr_len = (bchan->head <= bchan->tail) ?
+			bchan->tail - bchan->head :
+			MAX_DESCRIPTORS - bchan->head + bchan->tail;
+
+	list_for_each_entry_safe(async_desc, _adesc, &bchan->pending, node) {
+
+		/* bust out if we are out of room */
+		if (async_desc->num_desc + curr_len > MAX_DESCRIPTORS)
+			break;
+
+		/* copy descriptors into fifo */
+		if (bchan->tail + async_desc->num_desc > MAX_DESCRIPTORS) {
+			u32 partial = MAX_DESCRIPTORS - bchan->tail;
+
+			memcpy(&bchan->fifo_virt[bchan->tail], async_desc->desc,
+				partial * sizeof(struct bam_desc_hw));
+			memcpy(bchan->fifo_virt, &async_desc->desc[partial],
+				(async_desc->num_desc - partial) *
+					sizeof(struct bam_desc_hw));
+		} else {
+			memcpy(&bchan->fifo_virt[bchan->tail], async_desc->desc,
+				async_desc->num_desc *
+				sizeof(struct bam_desc_hw));
+		}
+
+		num_processed++;
+		bchan->tail += async_desc->num_desc;
+		bchan->tail %= MAX_DESCRIPTORS;
+		curr_len += async_desc->num_desc;
+
+		list_move_tail(&async_desc->node, &bchan->active);
+	}
+
+	/* bail if we didn't queue anything to the active queue */
+	if (!num_processed)
+		return;
+
+	async_desc = list_first_entry(&bchan->active, struct bam_async_desc,
+			node);
+
+	val = ioread32(bdev->regs + BAM_P_SW_OFSTS(bchan->id));
+	val &= P_SW_OFSTS_MASK;
+
+	/* kick off dma by forcing a write event to the pipe */
+	iowrite32((bchan->tail * sizeof(struct bam_desc_hw)),
+			bdev->regs + BAM_P_EVNT_REG(bchan->id));
+}
+
+/*
+ * bam_tx_submit - Adds transaction to channel pending queue
+ * @tx: transaction to queue
+ *
+ * Adds dma transaction to pending queue for channel
+ *
+*/
+static dma_cookie_t bam_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+	struct bam_chan *bchan = to_bam_chan(tx->chan);
+	struct bam_async_desc *desc = to_bam_async_desc(tx);
+	dma_cookie_t cookie;
+
+	spin_lock_bh(&bchan->lock);
+
+	cookie = dma_cookie_assign(tx);
+	list_add_tail(&desc->node, &bchan->pending);
+
+	spin_unlock_bh(&bchan->lock);
+
+	return cookie;
+}
+
+/*
+ * bam_prep_slave_sg - Prep slave sg transaction
+ *
+ * @chan: dma channel
+ * @sgl: scatter gather list
+ * @sg_len: length of sg
+ * @direction: DMA transfer direction
+ * @flags: DMA flags
+ * @context: transfer context (unused)
+ */
+static struct dma_async_tx_descriptor *bam_prep_slave_sg(struct dma_chan *chan,
+	struct scatterlist *sgl, unsigned int sg_len,
+	enum dma_transfer_direction direction, unsigned long flags,
+	void *context)
+{
+	struct bam_chan *bchan = to_bam_chan(chan);
+	struct bam_device *bdev = bchan->device;
+	struct bam_async_desc *async_desc = NULL;
+	struct scatterlist *sg;
+	u32 i;
+	struct bam_desc_hw *desc;
+
+
+	if (!is_slave_direction(direction)) {
+		dev_err(bdev->dev, "invalid dma direction\n");
+		goto err_out;
+	}
+
+	/* direction has to match pipe configuration from the slave config */
+	if (direction != bchan->bam_slave.slave.direction) {
+		dev_err(bdev->dev,
+				"trans does not match channel configuration\n");
+		goto err_out;
+	}
+
+	/* make sure number of descriptors will fit within the fifo */
+	if (sg_len > MAX_DESCRIPTORS) {
+		dev_err(bdev->dev, "not enough fifo descriptor space\n");
+		goto err_out;
+	}
+
+	/* allocate enough room to accomodate the number of entries */
+	async_desc = kzalloc(sizeof(*async_desc) +
+			(sg_len * sizeof(struct bam_desc_hw)), GFP_KERNEL);
+
+	if (!async_desc) {
+		dev_err(bdev->dev, "failed to allocate async descriptor\n");
+		goto err_out;
+	}
+
+	async_desc->num_desc = sg_len;
+	async_desc->dir = (direction == DMA_DEV_TO_MEM) ? BAM_PIPE_PRODUCER :
+				BAM_PIPE_CONSUMER;
+
+	/* fill in descriptors, align hw descriptor to 8 bytes */
+	desc = async_desc->desc;
+	for_each_sg(sgl, sg, sg_len, i) {
+		if (sg_dma_len(sg) > BAM_MAX_DATA_SIZE) {
+			dev_err(bdev->dev, "segment exceeds max size\n");
+			goto err_out;
+		}
+
+		desc->addr = sg_dma_address(sg);
+		desc->size = sg_dma_len(sg);
+		desc++;
+	}
+
+	/* set EOT flag on last descriptor, we want IRQ on completion */
+	async_desc->desc[async_desc->num_desc-1].flags |= DESC_FLAG_EOT;
+
+	dma_async_tx_descriptor_init(&async_desc->txd, chan);
+	async_desc->txd.tx_submit = bam_tx_submit;
+
+	return &async_desc->txd;
+
+err_out:
+	kfree(async_desc);
+	return NULL;
+}
+
+/*
+ * bam_dma_terminate_all - terminate all transactions
+ * @chan: dma channel
+ *
+ * Idles channel and dequeues and frees all transactions
+ * No callbacks are done
+ *
+*/
+static void bam_dma_terminate_all(struct dma_chan *chan)
+{
+	struct bam_chan *bchan = to_bam_chan(chan);
+	struct bam_device *bdev = bchan->device;
+	LIST_HEAD(desc_cleanup);
+	struct bam_async_desc *desc, *_desc;
+
+	spin_lock_bh(&bchan->lock);
+
+	/* reset channel */
+	iowrite32(1, bdev->regs + BAM_P_RST(bchan->id));
+	iowrite32(0, bdev->regs + BAM_P_RST(bchan->id));
+
+	/* grab all the descriptors and free them */
+	list_splice_tail_init(&bchan->pending, &desc_cleanup);
+	list_splice_tail_init(&bchan->active, &desc_cleanup);
+
+	list_for_each_entry_safe(desc, _desc, &desc_cleanup, node)
+		kfree(desc);
+
+	spin_unlock_bh(&bchan->lock);
+}
+
+/*
+ * bam_control - DMA device control
+ * @chan: dma channel
+ * @cmd: control cmd
+ * @arg: cmd argument
+ *
+ * Perform DMA control command
+ *
+*/
+static int bam_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
+	unsigned long arg)
+{
+	struct bam_chan *bchan = to_bam_chan(chan);
+	struct bam_device *bdev = bchan->device;
+	struct bam_dma_slave_config *bconfig;
+	int ret = 0;
+
+	switch (cmd) {
+	case DMA_PAUSE:
+		spin_lock_bh(&bchan->lock);
+		iowrite32(1, bdev->regs + BAM_P_HALT(bchan->id));
+		spin_unlock_bh(&bchan->lock);
+		break;
+	case DMA_RESUME:
+		spin_lock_bh(&bchan->lock);
+		iowrite32(0, bdev->regs + BAM_P_HALT(bchan->id));
+		spin_unlock_bh(&bchan->lock);
+		break;
+	case DMA_TERMINATE_ALL:
+		bam_dma_terminate_all(chan);
+		break;
+	case DMA_SLAVE_CONFIG:
+		bconfig = (struct bam_dma_slave_config *)arg;
+		bam_slave_config(bchan, bconfig);
+		break;
+	default:
+		ret = -EIO;
+		break;
+	}
+
+	return ret;
+}
+
+/*
+ * process_irqs_per_ee - processes the interrupts for a specific ee
+ * @bam: bam controller
+ * @ee: execution environment
+ *
+ * This function processes the interrupts for a given execution environment
+ *
+ */
+static u32 process_irqs_per_ee(struct bam_device *bdev,
+	u32 ee)
+{
+	u32 i, srcs, stts, pipe_stts;
+	u32 clr_mask = 0;
+
+
+	srcs = ioread32(bdev->regs + BAM_IRQ_SRCS_EE(ee));
+
+	/* check for general bam error */
+	if (srcs & BAM_IRQ) {
+		stts = ioread32(bdev->regs + BAM_IRQ_STTS);
+		clr_mask |= stts;
+	}
+
+	/* check pipes / channels */
+	if (srcs & P_IRQ) {
+
+		for (i = 0; i < bdev->num_channels; i++) {
+			if (srcs & (1 << i)) {
+				/* clear pipe irq */
+				pipe_stts = ioread32(bdev->regs +
+					BAM_P_IRQ_STTS(i));
+
+				iowrite32(pipe_stts, bdev->regs +
+					BAM_P_IRQ_CLR(i));
+
+				/* schedule channel work */
+				tasklet_schedule(&bdev->channels[i].tasklet);
+			}
+		}
+	}
+
+	return clr_mask;
+}
+
+/*
+ * bam_dma_irq - irq handler for bam controller
+ * @irq: IRQ of interrupt
+ * @data: callback data
+ *
+ * IRQ handler for the bam controller
+ */
+static irqreturn_t bam_dma_irq(int irq, void *data)
+{
+	struct bam_device *bdev = (struct bam_device *)data;
+	u32 clr_mask = 0;
+	u32 i;
+
+
+	for (i = 0; i < bdev->num_ees; i++) {
+		if (test_bit(i, &bdev->enabled_ees))
+			clr_mask |= process_irqs_per_ee(bdev, i);
+	}
+
+	iowrite32(clr_mask, bdev->regs + BAM_IRQ_CLR);
+
+	return IRQ_HANDLED;
+}
+
+/*
+ * bam_tx_status - returns status of transaction
+ * @chan: dma channel
+ * @cookie: transaction cookie
+ * @txstate: DMA transaction state
+ *
+ * Return status of dma transaction
+ */
+static enum dma_status bam_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
+		struct dma_tx_state *txstate)
+{
+	return dma_cookie_status(chan, cookie, txstate);
+}
+
+/*
+ * dma_tasklet - DMA IRQ tasklet
+ * @data: tasklet argument (bam controller structure)
+ *
+ * Sets up next DMA operation and then processes all completed transactions
+ */
+static void dma_tasklet(unsigned long data)
+{
+	struct bam_chan *bchan = (struct bam_chan *)data;
+	struct bam_async_desc *desc, *_desc;
+	LIST_HEAD(desc_cleanup);
+	u32 fifo_length;
+
+
+	spin_lock_bh(&bchan->lock);
+
+	if (list_empty(&bchan->active))
+		goto out;
+
+	fifo_length = (bchan->head <= bchan->tail) ?
+		bchan->tail - bchan->head :
+		MAX_DESCRIPTORS - bchan->head + bchan->tail;
+
+	/* only process those which are currently done */
+	list_for_each_entry_safe(desc, _desc, &bchan->active, node) {
+		if (desc->num_desc > fifo_length)
+			break;
+
+		dma_cookie_complete(&desc->txd);
+
+		list_move_tail(&desc->node, &desc_cleanup);
+		fifo_length -= desc->num_desc;
+		bchan->head += desc->num_desc;
+		bchan->head %= MAX_DESCRIPTORS;
+	}
+
+out:
+	/* kick off processing of any queued descriptors */
+	bam_start_dma(bchan);
+
+	spin_unlock_bh(&bchan->lock);
+
+	/* process completed descriptors */
+	list_for_each_entry_safe(desc, _desc, &desc_cleanup, node) {
+		if (desc->txd.callback)
+			desc->txd.callback(desc->txd.callback_param);
+
+		kfree(desc);
+	}
+}
+
+/*
+ * bam_issue_pending - starts pending transactions
+ * @chan: dma channel
+ *
+ * Calls tasklet directly which in turn starts any pending transactions
+ */
+static void bam_issue_pending(struct dma_chan *chan)
+{
+	dma_tasklet((unsigned long)chan);
+}
+
+struct bam_filter_args {
+	struct dma_device *dev;
+	u32 id;
+	u32 ee;
+	u32 dir;
+};
+
+static bool bam_dma_filter(struct dma_chan *chan, void *data)
+{
+	struct bam_filter_args *args = data;
+	struct bam_chan *bchan = to_bam_chan(chan);
+
+	if (args->dev == chan->device &&
+		args->id == bchan->id) {
+
+		/* we found the channel, so lets set the EE and dir */
+		bchan->ee = args->ee;
+		bchan->bam_slave.slave.direction = args->dir ?
+				DMA_DEV_TO_MEM : DMA_MEM_TO_DEV;
+		return true;
+	}
+
+	return false;
+}
+
+static struct dma_chan *bam_dma_xlate(struct of_phandle_args *dma_spec,
+		struct of_dma *of)
+{
+	struct bam_filter_args args;
+	dma_cap_mask_t cap;
+
+	if (dma_spec->args_count != 3)
+		return NULL;
+
+	args.dev = of->of_dma_data;
+	args.id = dma_spec->args[0];
+	args.ee = dma_spec->args[1];
+	args.dir = dma_spec->args[2];
+
+	dma_cap_zero(cap);
+	dma_cap_set(DMA_SLAVE, cap);
+
+	return dma_request_channel(cap, bam_dma_filter, &args);
+}
+
+/*
+ * bam_init
+ * @bdev: bam device
+ *
+ * Initialization helper for global bam registers
+ */
+static int bam_init(struct bam_device *bdev)
+{
+	union bam_num_pipes num_pipes;
+	union bam_ctrl ctrl;
+	union bam_cnfg_bits cnfg_bits;
+	union bam_revision revision;
+
+	/* read versioning information */
+	revision.value = ioread32(bdev->regs + BAM_REVISION);
+	bdev->num_ees = revision.bits.num_ees;
+
+	num_pipes.value = ioread32(bdev->regs + BAM_NUM_PIPES);
+	bdev->num_channels = num_pipes.bits.bam_num_pipes;
+
+	/* s/w reset bam */
+	/* after reset all pipes are disabled and idle */
+	ctrl.value = ioread32(bdev->regs + BAM_CTRL);
+	ctrl.bits.bam_sw_rst = 1;
+	iowrite32(ctrl.value, bdev->regs + BAM_CTRL);
+	ctrl.bits.bam_sw_rst = 0;
+	iowrite32(ctrl.value, bdev->regs + BAM_CTRL);
+
+	/* enable bam */
+	ctrl.bits.bam_en = 1;
+	iowrite32(ctrl.value, bdev->regs + BAM_CTRL);
+
+	/* set descriptor threshhold, start with 4 bytes */
+	iowrite32(DEFAULT_CNT_THRSHLD, bdev->regs + BAM_DESC_CNT_TRSHLD);
+
+	/* set config bits for h/w workarounds */
+	/* Enable all workarounds except BAM_FULL_PIPE */
+	cnfg_bits.value = 0xffffffff;
+	cnfg_bits.bits.obsolete = 0;
+	cnfg_bits.bits.obsolete2 = 0;
+	cnfg_bits.bits.reserved = 0;
+	cnfg_bits.bits.reserved2 = 0;
+	cnfg_bits.bits.bam_full_pipe = 0;
+	iowrite32(cnfg_bits.value, bdev->regs + BAM_CNFG_BITS);
+
+	/* enable irqs for errors */
+	iowrite32(BAM_ERROR_EN | BAM_HRESP_ERR_EN, bdev->regs + BAM_IRQ_EN);
+	return 0;
+}
+
+static void bam_channel_init(struct bam_device *bdev, struct bam_chan *bchan,
+	u32 index)
+{
+	bchan->id = index;
+	bchan->common.device = &bdev->common;
+	bchan->device = bdev;
+	spin_lock_init(&bchan->lock);
+
+	INIT_LIST_HEAD(&bchan->pending);
+	INIT_LIST_HEAD(&bchan->active);
+
+	dma_cookie_init(&bchan->common);
+	list_add_tail(&bchan->common.device_node,
+		&bdev->common.channels);
+
+	tasklet_init(&bchan->tasklet, dma_tasklet, (unsigned long)bchan);
+
+	/* reset channel - just to be sure */
+	iowrite32(1, bdev->regs + BAM_P_RST(bchan->id));
+	iowrite32(0, bdev->regs + BAM_P_RST(bchan->id));
+}
+
+static int bam_dma_probe(struct platform_device *pdev)
+{
+	struct bam_device *bdev;
+	int err, i;
+
+	bdev = kzalloc(sizeof(*bdev), GFP_KERNEL);
+	if (!bdev) {
+		dev_err(&pdev->dev, "insufficient memory for private data\n");
+		err = -ENOMEM;
+		goto err_no_bdev;
+	}
+
+	bdev->dev = &pdev->dev;
+	dev_set_drvdata(bdev->dev, bdev);
+
+	bdev->regs = of_iomap(pdev->dev.of_node, 0);
+	if (!bdev->regs) {
+		dev_err(bdev->dev, "unable to ioremap base\n");
+		err = -ENOMEM;
+		goto err_free_bamdev;
+	}
+
+	bdev->irq = irq_of_parse_and_map(pdev->dev.of_node, 0);
+	if (bdev->irq == NO_IRQ) {
+		dev_err(bdev->dev, "unable to map irq\n");
+		err = -EINVAL;
+		goto err_unmap_mem;
+	}
+
+	bdev->bamclk = devm_clk_get(bdev->dev, "bam_clk");
+	if (IS_ERR(bdev->bamclk)) {
+		err = PTR_ERR(bdev->bamclk);
+		goto err_free_irq;
+	}
+
+	err = clk_prepare_enable(bdev->bamclk);
+	if (err) {
+		dev_err(bdev->dev, "failed to prepare/enable clock");
+		goto err_free_irq;
+	}
+
+	err = request_irq(bdev->irq, &bam_dma_irq, IRQF_TRIGGER_HIGH, "bam_dma",
+				bdev);
+	if (err) {
+		dev_err(bdev->dev, "error requesting irq\n");
+		err = -EINVAL;
+		goto err_disable_clk;
+	}
+
+	if (bam_init(bdev)) {
+		dev_err(bdev->dev, "cannot initialize bam device\n");
+		err = -EINVAL;
+		goto err_disable_clk;
+	}
+
+	bdev->channels = kzalloc(sizeof(*bdev->channels) * bdev->num_channels,
+				GFP_KERNEL);
+
+	if (!bdev->channels) {
+		dev_err(bdev->dev, "unable to allocate channels\n");
+		err = -ENOMEM;
+		goto err_disable_clk;
+	}
+
+	/* allocate and initialize channels */
+	INIT_LIST_HEAD(&bdev->common.channels);
+
+	for (i = 0; i < bdev->num_channels; i++)
+		bam_channel_init(bdev, &bdev->channels[i], i);
+
+	/* set max dma segment size */
+	bdev->common.dev = bdev->dev;
+	bdev->common.dev->dma_parms = &bdev->dma_parms;
+	if (dma_set_max_seg_size(bdev->common.dev, BAM_MAX_DATA_SIZE)) {
+		dev_err(bdev->dev, "cannot set maximum segment size\n");
+		goto err_disable_clk;
+	}
+
+	/* set capabilities */
+	dma_cap_zero(bdev->common.cap_mask);
+	dma_cap_set(DMA_SLAVE, bdev->common.cap_mask);
+
+	/* initialize dmaengine apis */
+	bdev->common.device_alloc_chan_resources = bam_alloc_chan;
+	bdev->common.device_free_chan_resources = bam_free_chan;
+	bdev->common.device_prep_slave_sg = bam_prep_slave_sg;
+	bdev->common.device_control = bam_control;
+	bdev->common.device_issue_pending = bam_issue_pending;
+	bdev->common.device_tx_status = bam_tx_status;
+	bdev->common.dev = bdev->dev;
+
+	dma_async_device_register(&bdev->common);
+
+	if (pdev->dev.of_node) {
+		err = of_dma_controller_register(pdev->dev.of_node,
+				bam_dma_xlate, &bdev->common);
+
+		if (err) {
+			dev_err(bdev->dev, "failed to register of_dma\n");
+			goto err_unregister_dma;
+		}
+	}
+
+	return 0;
+
+err_unregister_dma:
+	dma_async_device_unregister(&bdev->common);
+err_free_irq:
+	free_irq(bdev->irq, bdev);
+err_disable_clk:
+	clk_disable_unprepare(bdev->bamclk);
+err_unmap_mem:
+	iounmap(bdev->regs);
+err_free_bamdev:
+	if (bdev)
+		kfree(bdev->channels);
+	kfree(bdev);
+err_no_bdev:
+	return err;
+}
+
+static int bam_dma_remove(struct platform_device *pdev)
+{
+	struct bam_device *bdev;
+
+	bdev = dev_get_drvdata(&pdev->dev);
+	dev_set_drvdata(&pdev->dev, NULL);
+
+	dma_async_device_unregister(&bdev->common);
+
+	if (bdev) {
+		free_irq(bdev->irq, bdev);
+		clk_disable_unprepare(bdev->bamclk);
+		iounmap(bdev->regs);
+		kfree(bdev->channels);
+	}
+
+	kfree(bdev);
+	return 0;
+}
+
+#ifdef CONFIG_OF
+static const struct of_device_id bam_of_match[] = {
+	{ .compatible = "qcom,bam", },
+	{}
+};
+MODULE_DEVICE_TABLE(of, bam_of_match);
+#endif
+
+static struct platform_driver bam_dma_driver = {
+	.probe = bam_dma_probe,
+	.remove = bam_dma_remove,
+	.driver = {
+		.name = "bam-dma-engine",
+		.owner = THIS_MODULE,
+		.of_match_table = of_match_ptr(bam_of_match),
+	},
+};
+
+static int __init bam_dma_init(void)
+{
+	return platform_driver_register(&bam_dma_driver);
+}
+
+static void __exit bam_dma_exit(void)
+{
+	return platform_driver_unregister(&bam_dma_driver);
+}
+
+arch_initcall(bam_dma_init);
+module_exit(bam_dma_exit);
+
+MODULE_AUTHOR("Andy Gross <agross@codeaurora.org>");
+MODULE_DESCRIPTION("MSM BAM DMA engine driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/msm_bam_dma_priv.h b/drivers/dma/msm_bam_dma_priv.h
new file mode 100644
index 0000000..4d6a10c7
--- /dev/null
+++ b/drivers/dma/msm_bam_dma_priv.h
@@ -0,0 +1,286 @@ 
+/*
+ * Copyright (c) 2013, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+#ifndef __MSM_BAM_DMA_PRIV_H__
+#define __MSM_BAM_DMA_PRIV_H__
+
+#include <linux/dmaengine.h>
+
+enum bam_channel_mode {
+	BAM_PIPE_MODE_BAM2BAM = 0,	/* BAM to BAM aka device to device */
+	BAM_PIPE_MODE_SYSTEM,		/* BAM to/from System Memory */
+};
+
+enum bam_channel_dir {
+	BAM_PIPE_CONSUMER = 0,	/* channel reads from data-fifo or memory */
+	BAM_PIPE_PRODUCER,	/* channel writes to data-fifo or memory */
+};
+
+struct bam_desc_hw {
+	u32 addr;		/* Buffer physical address */
+	u32 size:16;		/* Buffer size in bytes */
+	u32 flags:16;
+};
+
+#define DESC_FLAG_INT	(1<<15)
+#define DESC_FLAG_EOT	(1<<14)
+#define DESC_FLAG_EOB	(1<<13)
+
+struct bam_async_desc {
+	struct list_head node;
+	struct dma_async_tx_descriptor txd;
+	u32 num_desc;
+	enum bam_channel_dir dir;
+	u32 fifo_pos;
+	struct bam_desc_hw desc[0];
+};
+
+static inline struct bam_async_desc *to_bam_async_desc(
+		struct dma_async_tx_descriptor *txd)
+{
+	return container_of(txd, struct bam_async_desc, txd);
+}
+
+
+#define BAM_CTRL			0x0000
+#define BAM_REVISION			0x0004
+#define BAM_SW_REVISION			0x0080
+#define BAM_NUM_PIPES			0x003C
+#define BAM_TIMER			0x0040
+#define BAM_TIMER_CTRL			0x0044
+#define BAM_DESC_CNT_TRSHLD		0x0008
+#define BAM_IRQ_SRCS			0x000C
+#define BAM_IRQ_SRCS_MSK		0x0010
+#define BAM_IRQ_SRCS_UNMASKED		0x0030
+#define BAM_IRQ_STTS			0x0014
+#define BAM_IRQ_CLR			0x0018
+#define BAM_IRQ_EN			0x001C
+#define BAM_CNFG_BITS			0x007C
+#define BAM_IRQ_SRCS_EE(x)		(0x0800 + ((x) * 0x80))
+#define BAM_IRQ_SRCS_MSK_EE(x)		(0x0804 + ((x) * 0x80))
+#define BAM_P_CTRL(x)			(0x1000 + ((x) * 0x1000))
+#define BAM_P_RST(x)			(0x1004 + ((x) * 0x1000))
+#define BAM_P_HALT(x)			(0x1008 + ((x) * 0x1000))
+#define BAM_P_IRQ_STTS(x)		(0x1010 + ((x) * 0x1000))
+#define BAM_P_IRQ_CLR(x)		(0x1014 + ((x) * 0x1000))
+#define BAM_P_IRQ_EN(x)			(0x1018 + ((x) * 0x1000))
+#define BAM_P_EVNT_DEST_ADDR(x)		(0x182C + ((x) * 0x1000))
+#define BAM_P_EVNT_REG(x)		(0x1818 + ((x) * 0x1000))
+#define BAM_P_SW_OFSTS(x)		(0x1800 + ((x) * 0x1000))
+#define BAM_P_DATA_FIFO_ADDR(x)		(0x1824 + ((x) * 0x1000))
+#define BAM_P_DESC_FIFO_ADDR(x)		(0x181C + ((x) * 0x1000))
+#define BAM_P_EVNT_TRSHLD(x)		(0x1828 + ((x) * 0x1000))
+#define BAM_P_FIFO_SIZES(x)		(0x1820 + ((x) * 0x1000))
+
+union bam_ctrl {
+	u32 value;
+	struct {
+		u32 bam_sw_rst:1;
+		u32 bam_en:1;
+		u32 reserved3:2;
+		u32 bam_en_accum:1;
+		u32 testbus_sel:7;
+		u32 reserved2:1;
+		u32 bam_desc_cache_sel:2;
+		u32 bam_cached_desc_store:1;
+		u32 ibc_disable:1;
+		u32 reserved1:15;
+	} bits;
+};
+
+union bam_revision {
+	u32 value;
+	struct {
+		u32 revision:8;
+		u32 num_ees:4;
+		u32 reserved1:1;
+		u32 ce_buffer_size:1;
+		u32 axi_active:1;
+		u32 use_vmidmt:1;
+		u32 secured:1;
+		u32 bam_has_no_bypass:1;
+		u32 high_frequency_bam:1;
+		u32 inactiv_tmrs_exst:1;
+		u32 num_inactiv_tmrs:1;
+		u32 desc_cache_depth:2;
+		u32 cmd_desc_en:1;
+		u32 inactiv_tmr_base:8;
+	} bits;
+};
+
+union bam_sw_revision {
+	u32 value;
+	struct {
+		u32 step:16;
+		u32 minor:12;
+		u32 major:4;
+	} bits;
+};
+
+union bam_num_pipes {
+	u32 value;
+	struct {
+		u32 bam_num_pipes:8;
+		u32 reserved:8;
+		u32 periph_non_pipe_grp:8;
+		u32 bam_non_pipe_grp:8;
+	} bits;
+};
+
+union bam_irq_srcs_msk {
+	u32 value;
+	struct {
+		u32 p_irq_msk:31;
+		u32 bam_irq_msk:1;
+	} bits;
+};
+
+union bam_cnfg_bits {
+	u32 value;
+	struct {
+		u32 obsolete:2;
+		u32 bam_pipe_cnfg:1;
+		u32 obsolete2:1;
+		u32 reserved:7;
+		u32 bam_full_pipe:1;
+		u32 bam_no_ext_p_rst:1;
+		u32 bam_ibc_disable:1;
+		u32 bam_sb_clk_req:1;
+		u32 bam_psm_csw_req:1;
+		u32 bam_psm_p_res:1;
+		u32 bam_au_p_res:1;
+		u32 bam_si_p_res:1;
+		u32 bam_wb_p_res:1;
+		u32 bam_wb_blk_csw:1;
+		u32 bam_wb_csw_ack_idl:1;
+		u32 bam_wb_retr_svpnt:1;
+		u32 bam_wb_dsc_avl_p_rst:1;
+		u32 bam_reg_p_en:1;
+		u32 bam_psm_p_hd_data:1;
+		u32 bam_au_accumed:1;
+		u32 bam_cd_enable:1;
+		u32 reserved2:4;
+	} bits;
+};
+
+union bam_pipe_ctrl {
+	u32 value;
+	struct {
+		u32 reserved:1;
+		u32 p_en:1;
+		u32 reserved2:1;
+		u32 p_direction:1;
+		u32 p_sys_strm:1;
+		u32 p_sys_mode:1;
+		u32 p_auto_eob:1;
+		u32 p_auto_eob_sel:2;
+		u32 p_prefetch_limit:2;
+		u32 p_write_nwd:1;
+		u32 reserved3:4;
+		u32 p_lock_group:5;
+		u32 reserved4:11;
+	} bits;
+};
+
+/* BAM_DESC_CNT_TRSHLD */
+#define CNT_TRSHLD		0xffff
+#define DEFAULT_CNT_THRSHLD	0x4
+
+/* BAM_IRQ_SRCS */
+#define BAM_IRQ			(0x1 << 31)
+#define P_IRQ			0x7fffffff
+
+/* BAM_IRQ_SRCS_MSK */
+#define BAM_IRQ_MSK		(0x1 << 31)
+#define P_IRQ_MSK		0x7fffffff
+
+/* BAM_IRQ_STTS */
+#define BAM_TIMER_IRQ		(0x1 << 4)
+#define BAM_EMPTY_IRQ		(0x1 << 3)
+#define BAM_ERROR_IRQ		(0x1 << 2)
+#define BAM_HRESP_ERR_IRQ	(0x1 << 1)
+
+/* BAM_IRQ_CLR */
+#define BAM_TIMER_CLR		(0x1 << 4)
+#define BAM_EMPTY_CLR		(0x1 << 3)
+#define BAM_ERROR_CLR		(0x1 << 2)
+#define BAM_HRESP_ERR_CLR	(0x1 << 1)
+
+/* BAM_IRQ_EN */
+#define BAM_TIMER_EN		(0x1 << 4)
+#define BAM_EMPTY_EN		(0x1 << 3)
+#define BAM_ERROR_EN		(0x1 << 2)
+#define BAM_HRESP_ERR_EN	(0x1 << 1)
+
+/* BAM_P_IRQ_EN */
+#define P_PRCSD_DESC_EN		(0x1 << 0)
+#define P_TIMER_EN		(0x1 << 1)
+#define P_WAKE_EN		(0x1 << 2)
+#define P_OUT_OF_DESC_EN	(0x1 << 3)
+#define P_ERR_EN		(0x1 << 4)
+#define P_TRNSFR_END_EN		(0x1 << 5)
+#define P_DEFAULT_IRQS_EN	(P_PRCSD_DESC_EN | P_ERR_EN | P_TRNSFR_END_EN)
+
+/* BAM_P_SW_OFSTS */
+#define P_SW_OFSTS_MASK		0xffff
+
+#define BAM_DESC_FIFO_SIZE	SZ_32K
+#define MAX_DESCRIPTORS (BAM_DESC_FIFO_SIZE / sizeof(struct bam_desc_hw) - 1)
+#define BAM_MAX_DATA_SIZE	(SZ_32K - 8)
+
+struct bam_chan {
+	struct dma_chan common;
+	struct bam_device *device;
+	u32 id;
+	u32 ee;
+	bool idle;
+	struct bam_dma_slave_config bam_slave;	/* runtime configuration */
+
+	struct tasklet_struct tasklet;
+	spinlock_t lock;		/* descriptor lock */
+
+	struct list_head pending;	/* desc pending queue */
+	struct list_head active;	/* desc running queue */
+
+	struct bam_desc_hw *fifo_virt;
+	dma_addr_t fifo_phys;
+
+	/* fifo markers */
+	unsigned short head;		/* start of active descriptor entries */
+	unsigned short tail;		/* end of active descriptor entries */
+};
+
+static inline struct bam_chan *to_bam_chan(struct dma_chan *common)
+{
+	return container_of(common, struct bam_chan, common);
+}
+
+struct bam_device {
+	void __iomem *regs;
+	struct device *dev;
+	struct dma_device common;
+	struct device_dma_parameters dma_parms;
+	struct bam_chan *channels;
+	u32 num_channels;
+	u32 num_ees;
+	unsigned long enabled_ees;
+	u32 feature;
+	int irq;
+	struct clk *bamclk;
+};
+
+static inline struct bam_device *to_bam_device(struct dma_device *common)
+{
+	return container_of(common, struct bam_device, common);
+}
+
+#endif /* __MSM_BAM_DMA_PRIV_H__ */
diff --git a/include/linux/msm_bam_dma.h b/include/linux/msm_bam_dma.h
new file mode 100644
index 0000000..a358dba
--- /dev/null
+++ b/include/linux/msm_bam_dma.h
@@ -0,0 +1,27 @@ 
+/*
+ * msm_bam_dma.h - MSM BAM DMA engine Driver
+ *
+ * Copyright (c) 2013, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+#ifndef __MSM_BAM_DMA_H__
+#define __MSM_BAM_DMA_H__
+
+#include <linux/dmaengine.h>
+
+struct bam_dma_slave_config {
+	struct dma_slave_config slave;
+
+	/* BAM specific slave config starts here */
+	u16 desc_threshold;
+};
+
+#endif /* __MSM_BAM_DMA_H__ */