diff mbox

dma: mv_xor_v2: new driver

Message ID 1455523083-25506-1-git-send-email-thomas.petazzoni@free-electrons.com (mailing list archive)
State Changes Requested
Headers show

Commit Message

Thomas Petazzoni Feb. 15, 2016, 7:58 a.m. UTC
The new mv_xor_v2 driver supports the XOR engines found in the 64-bits
ARM from Marvell of the Armada 7K and Armada 8K family. This XOR
engine is a completely new hardware block, entirely different from the
one used on previous Marvell Armada platforms, which use the existing
mv_xor driver.

Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
---
 .../devicetree/bindings/dma/mv-xor-v2.txt          |  19 +
 drivers/dma/Kconfig                                |  13 +
 drivers/dma/Makefile                               |   1 +
 drivers/dma/mv_xor_v2.c                            | 880 +++++++++++++++++++++
 4 files changed, 913 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/dma/mv-xor-v2.txt
 create mode 100644 drivers/dma/mv_xor_v2.c

Comments

kernel test robot Feb. 15, 2016, 9:09 a.m. UTC | #1
Hi Thomas,

[auto build test ERROR on robh/for-next]
[also build test ERROR on v4.5-rc4 next-20160215]
[if your patch is applied to the wrong git tree, please drop us a note to help improving the system]

url:    https://github.com/0day-ci/linux/commits/Thomas-Petazzoni/dma-mv_xor_v2-new-driver/20160215-160120
base:   https://git.kernel.org/pub/scm/linux/kernel/git/robh/linux for-next
config: openrisc-allyesconfig (attached as .config)
reproduce:
        wget https://git.kernel.org/cgit/linux/kernel/git/wfg/lkp-tests.git/plain/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        make.cross ARCH=openrisc 

All errors (new ones prefixed by >>):

   In file included from kernel/irq/chip.c:14:0:
>> include/linux/msi.h:174:21: fatal error: asm/msi.h: No such file or directory
   compilation terminated.

vim +174 include/linux/msi.h

c2791b80 Yijing Wang    2014-11-11  168  	void (*teardown_irq)(struct msi_controller *chip, unsigned int irq);
0cbdcfcf Thierry Reding 2013-08-09  169  };
0cbdcfcf Thierry Reding 2013-08-09  170  
f3cf8bb0 Jiang Liu      2014-11-12  171  #ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN
d9109698 Jiang Liu      2014-11-15  172  
aeeb5965 Jiang Liu      2014-11-15  173  #include <linux/irqhandler.h>
d9109698 Jiang Liu      2014-11-15 @174  #include <asm/msi.h>
d9109698 Jiang Liu      2014-11-15  175  
f3cf8bb0 Jiang Liu      2014-11-12  176  struct irq_domain;
552c494a Marc Zyngier   2015-11-23  177  struct irq_domain_ops;

:::::: The code at line 174 was first introduced by commit
:::::: d9109698be6e7439e6082aa00d79d4556114739b genirq: Introduce msi_domain_alloc/free_irqs()

:::::: TO: Jiang Liu <jiang.liu@linux.intel.com>
:::::: CC: Thomas Gleixner <tglx@linutronix.de>

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation
kernel test robot Feb. 15, 2016, 9:34 a.m. UTC | #2
Hi Thomas,

[auto build test WARNING on robh/for-next]
[also build test WARNING on v4.5-rc4 next-20160215]
[if your patch is applied to the wrong git tree, please drop us a note to help improving the system]

url:    https://github.com/0day-ci/linux/commits/Thomas-Petazzoni/dma-mv_xor_v2-new-driver/20160215-160120
base:   https://git.kernel.org/pub/scm/linux/kernel/git/robh/linux for-next


coccinelle warnings: (new ones prefixed by >>)

>> drivers/dma/mv_xor_v2.c:870:3-8: No need to set .owner here. The core will do it.

Please review and possibly fold the followup patch.

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation
--
To unsubscribe from this list: send the line "unsubscribe dmaengine" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Thomas Petazzoni Feb. 15, 2016, 9:50 a.m. UTC | #3
Marc,

This doesn't seem to be an issue introduced by my patch itself, but
rather a problem with the MSI subsystem itself. Is this already a known
issue (possibly already fixed), or should I investigate?

Thanks!

Thomas

On Mon, 15 Feb 2016 17:09:27 +0800, kbuild test robot wrote:
> Hi Thomas,
> 
> [auto build test ERROR on robh/for-next]
> [also build test ERROR on v4.5-rc4 next-20160215]
> [if your patch is applied to the wrong git tree, please drop us a note to help improving the system]
> 
> url:    https://github.com/0day-ci/linux/commits/Thomas-Petazzoni/dma-mv_xor_v2-new-driver/20160215-160120
> base:   https://git.kernel.org/pub/scm/linux/kernel/git/robh/linux for-next
> config: openrisc-allyesconfig (attached as .config)
> reproduce:
>         wget https://git.kernel.org/cgit/linux/kernel/git/wfg/lkp-tests.git/plain/sbin/make.cross -O ~/bin/make.cross
>         chmod +x ~/bin/make.cross
>         # save the attached .config to linux build tree
>         make.cross ARCH=openrisc 
> 
> All errors (new ones prefixed by >>):
> 
>    In file included from kernel/irq/chip.c:14:0:
> >> include/linux/msi.h:174:21: fatal error: asm/msi.h: No such file or directory
>    compilation terminated.
> 
> vim +174 include/linux/msi.h
> 
> c2791b80 Yijing Wang    2014-11-11  168  	void (*teardown_irq)(struct msi_controller *chip, unsigned int irq);
> 0cbdcfcf Thierry Reding 2013-08-09  169  };
> 0cbdcfcf Thierry Reding 2013-08-09  170  
> f3cf8bb0 Jiang Liu      2014-11-12  171  #ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN
> d9109698 Jiang Liu      2014-11-15  172  
> aeeb5965 Jiang Liu      2014-11-15  173  #include <linux/irqhandler.h>
> d9109698 Jiang Liu      2014-11-15 @174  #include <asm/msi.h>
> d9109698 Jiang Liu      2014-11-15  175  
> f3cf8bb0 Jiang Liu      2014-11-12  176  struct irq_domain;
> 552c494a Marc Zyngier   2015-11-23  177  struct irq_domain_ops;
> 
> :::::: The code at line 174 was first introduced by commit
> :::::: d9109698be6e7439e6082aa00d79d4556114739b genirq: Introduce msi_domain_alloc/free_irqs()
> 
> :::::: TO: Jiang Liu <jiang.liu@linux.intel.com>
> :::::: CC: Thomas Gleixner <tglx@linutronix.de>
> 
> ---
> 0-DAY kernel test infrastructure                Open Source Technology Center
> https://lists.01.org/pipermail/kbuild-all                   Intel Corporation
kernel test robot Feb. 15, 2016, 9:56 a.m. UTC | #4
Hi Thomas,

[auto build test ERROR on robh/for-next]
[also build test ERROR on v4.5-rc4 next-20160215]
[if your patch is applied to the wrong git tree, please drop us a note to help improving the system]

url:    https://github.com/0day-ci/linux/commits/Thomas-Petazzoni/dma-mv_xor_v2-new-driver/20160215-160120
base:   https://git.kernel.org/pub/scm/linux/kernel/git/robh/linux for-next
config: sparc64-allyesconfig (attached as .config)
reproduce:
        wget https://git.kernel.org/cgit/linux/kernel/git/wfg/lkp-tests.git/plain/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        make.cross ARCH=sparc64 

All errors (new ones prefixed by >>):

   In file included from include/linux/of_pci.h:5:0,
                    from drivers/of/of_pci.c:6:
>> include/linux/msi.h:203:10: error: unknown type name 'msi_alloc_info_t'
             msi_alloc_info_t *arg);
             ^
   include/linux/msi.h:207:9: error: unknown type name 'msi_alloc_info_t'
            msi_alloc_info_t *arg);
            ^
   include/linux/msi.h:216:12: error: unknown type name 'msi_alloc_info_t'
               msi_alloc_info_t *arg);
               ^
   include/linux/msi.h:217:22: error: unknown type name 'msi_alloc_info_t'
     void  (*msi_finish)(msi_alloc_info_t *arg, int retval);
                         ^
   include/linux/msi.h:218:20: error: unknown type name 'msi_alloc_info_t'
     void  (*set_desc)(msi_alloc_info_t *arg,
                       ^
   include/linux/msi.h:286:18: error: unknown type name 'msi_alloc_info_t'
           int nvec, msi_alloc_info_t *args);
                     ^
   include/linux/msi.h:288:29: error: unknown type name 'msi_alloc_info_t'
            int virq, int nvec, msi_alloc_info_t *args);
                                ^

vim +/msi_alloc_info_t +203 include/linux/msi.h

d9109698 Jiang Liu        2014-11-15  197   * @msi_check, @msi_prepare, @msi_finish, @set_desc and @handle_error
1d1e8cdc Thomas Petazzoni 2015-12-21  198   * are callbacks used by msi_domain_alloc_irqs() and related
d9109698 Jiang Liu        2014-11-15  199   * interfaces which are based on msi_desc.
f3cf8bb0 Jiang Liu        2014-11-12  200   */
f3cf8bb0 Jiang Liu        2014-11-12  201  struct msi_domain_ops {
aeeb5965 Jiang Liu        2014-11-15  202  	irq_hw_number_t	(*get_hwirq)(struct msi_domain_info *info,
aeeb5965 Jiang Liu        2014-11-15 @203  				     msi_alloc_info_t *arg);
f3cf8bb0 Jiang Liu        2014-11-12  204  	int		(*msi_init)(struct irq_domain *domain,
f3cf8bb0 Jiang Liu        2014-11-12  205  				    struct msi_domain_info *info,
f3cf8bb0 Jiang Liu        2014-11-12  206  				    unsigned int virq, irq_hw_number_t hwirq,

:::::: The code at line 203 was first introduced by commit
:::::: aeeb59657c35da64068336c20068da237f41ab76 genirq: Provide default callbacks for msi_domain_ops

:::::: TO: Jiang Liu <jiang.liu@linux.intel.com>
:::::: CC: Thomas Gleixner <tglx@linutronix.de>

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation
Marc Zyngier Feb. 15, 2016, 9:59 a.m. UTC | #5
Hi Thomas,

On 15/02/16 09:50, Thomas Petazzoni wrote:
> Marc,
> 
> This doesn't seem to be an issue introduced by my patch itself, but
> rather a problem with the MSI subsystem itself. Is this already a known
> issue (possibly already fixed), or should I investigate?

Seems like something is selecting GENERIC_MSI_IRQ_DOMAIN on a system
that doesn't implement the required bits.

It may be that we need to implement something like
HAS_GENERIC_MSI_IRQ_DOMAIN, and make GENERIC_MSI_IRQ_DOMAIN conditional
on that. Or just whitelist the architectures that do implement this.

Any chance you could have a closer look?

Thanks,

	M.

> 
> Thanks!
> 
> Thomas
> 
> On Mon, 15 Feb 2016 17:09:27 +0800, kbuild test robot wrote:
>> Hi Thomas,
>>
>> [auto build test ERROR on robh/for-next]
>> [also build test ERROR on v4.5-rc4 next-20160215]
>> [if your patch is applied to the wrong git tree, please drop us a note to help improving the system]
>>
>> url:    https://github.com/0day-ci/linux/commits/Thomas-Petazzoni/dma-mv_xor_v2-new-driver/20160215-160120
>> base:   https://git.kernel.org/pub/scm/linux/kernel/git/robh/linux for-next
>> config: openrisc-allyesconfig (attached as .config)
>> reproduce:
>>         wget https://git.kernel.org/cgit/linux/kernel/git/wfg/lkp-tests.git/plain/sbin/make.cross -O ~/bin/make.cross
>>         chmod +x ~/bin/make.cross
>>         # save the attached .config to linux build tree
>>         make.cross ARCH=openrisc 
>>
>> All errors (new ones prefixed by >>):
>>
>>    In file included from kernel/irq/chip.c:14:0:
>>>> include/linux/msi.h:174:21: fatal error: asm/msi.h: No such file or directory
>>    compilation terminated.
>>
>> vim +174 include/linux/msi.h
>>
>> c2791b80 Yijing Wang    2014-11-11  168  	void (*teardown_irq)(struct msi_controller *chip, unsigned int irq);
>> 0cbdcfcf Thierry Reding 2013-08-09  169  };
>> 0cbdcfcf Thierry Reding 2013-08-09  170  
>> f3cf8bb0 Jiang Liu      2014-11-12  171  #ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN
>> d9109698 Jiang Liu      2014-11-15  172  
>> aeeb5965 Jiang Liu      2014-11-15  173  #include <linux/irqhandler.h>
>> d9109698 Jiang Liu      2014-11-15 @174  #include <asm/msi.h>
>> d9109698 Jiang Liu      2014-11-15  175  
>> f3cf8bb0 Jiang Liu      2014-11-12  176  struct irq_domain;
>> 552c494a Marc Zyngier   2015-11-23  177  struct irq_domain_ops;
>>
>> :::::: The code at line 174 was first introduced by commit
>> :::::: d9109698be6e7439e6082aa00d79d4556114739b genirq: Introduce msi_domain_alloc/free_irqs()
>>
>> :::::: TO: Jiang Liu <jiang.liu@linux.intel.com>
>> :::::: CC: Thomas Gleixner <tglx@linutronix.de>
>>
>> ---
>> 0-DAY kernel test infrastructure                Open Source Technology Center
>> https://lists.01.org/pipermail/kbuild-all                   Intel Corporation
> 
> 
>
Thomas Petazzoni Feb. 15, 2016, 10:58 a.m. UTC | #6
Hello,

On Mon, 15 Feb 2016 09:59:27 +0000, Marc Zyngier wrote:

> Seems like something is selecting GENERIC_MSI_IRQ_DOMAIN on a system
> that doesn't implement the required bits.
> 
> It may be that we need to implement something like
> HAS_GENERIC_MSI_IRQ_DOMAIN, and make GENERIC_MSI_IRQ_DOMAIN conditional
> on that. Or just whitelist the architectures that do implement this.
> 
> Any chance you could have a closer look?

I'll have a look. However, in the mean time, I will probably make this
new driver depend on ARM64 so that it does not depend on other patches
touching the MSI mechanism.

Thanks,

Thomas
Rob Herring (Arm) Feb. 22, 2016, 2:53 a.m. UTC | #7
On Mon, Feb 15, 2016 at 08:58:03AM +0100, Thomas Petazzoni wrote:
> The new mv_xor_v2 driver supports the XOR engines found in the 64-bits
> ARM from Marvell of the Armada 7K and Armada 8K family. This XOR
> engine is a completely new hardware block, entirely different from the
> one used on previous Marvell Armada platforms, which use the existing
> mv_xor driver.
> 
> Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
> ---
>  .../devicetree/bindings/dma/mv-xor-v2.txt          |  19 +
>  drivers/dma/Kconfig                                |  13 +
>  drivers/dma/Makefile                               |   1 +
>  drivers/dma/mv_xor_v2.c                            | 880 +++++++++++++++++++++
>  4 files changed, 913 insertions(+)
>  create mode 100644 Documentation/devicetree/bindings/dma/mv-xor-v2.txt
>  create mode 100644 drivers/dma/mv_xor_v2.c
> 
> diff --git a/Documentation/devicetree/bindings/dma/mv-xor-v2.txt b/Documentation/devicetree/bindings/dma/mv-xor-v2.txt
> new file mode 100644
> index 0000000..0a03dcf
> --- /dev/null
> +++ b/Documentation/devicetree/bindings/dma/mv-xor-v2.txt
> @@ -0,0 +1,19 @@
> +* Marvell XOR v2 engines
> +
> +Required properties:
> +- compatible: Should be "marvell,mv-xor-v2"

and an SoC specific compatible please.

"marvell,mv" is a bit redundant.

> +- reg: Should contain registers location and length (two sets)
> +    the first set is the DMA registers
> +    the second set is the global registers
> +- msi-parent: Phandle to the MSI-capable interrupt controller used for
> +  interrupts.
> +
> +Example:
> +
> +	xor0@400000 {
> +		compatible = "marvell,mv-xor-v2";
> +		reg = <0x400000 0x1000>,
> +		      <0x410000 0x1000>;
> +		msi-parent = <&gic_v2m0>;
> +		dma-coherent;
> +	};

--
To unsubscribe from this list: send the line "unsubscribe dmaengine" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Vinod Koul Feb. 22, 2016, 3:27 a.m. UTC | #8
On Mon, Feb 15, 2016 at 08:58:03AM +0100, Thomas Petazzoni wrote:
> diff --git a/Documentation/devicetree/bindings/dma/mv-xor-v2.txt b/Documentation/devicetree/bindings/dma/mv-xor-v2.txt
> new file mode 100644
> index 0000000..0a03dcf
> --- /dev/null
> +++ b/Documentation/devicetree/bindings/dma/mv-xor-v2.txt
> @@ -0,0 +1,19 @@
> +* Marvell XOR v2 engines
> +
> +Required properties:
> +- compatible: Should be "marvell,mv-xor-v2"
> +- reg: Should contain registers location and length (two sets)
> +    the first set is the DMA registers
> +    the second set is the global registers
> +- msi-parent: Phandle to the MSI-capable interrupt controller used for
> +  interrupts.
> +
> +Example:
> +
> +	xor0@400000 {
> +		compatible = "marvell,mv-xor-v2";
> +		reg = <0x400000 0x1000>,
> +		      <0x410000 0x1000>;
> +		msi-parent = <&gic_v2m0>;
> +		dma-coherent;
> +	};

This should be a separate patch :)

> +/* DMA Engine Registers */
> +#define DMA_DESQ_BALR_OFF	0x000
> +#define DMA_DESQ_BAHR_OFF	0x004
> +#define DMA_DESQ_SIZE_OFF	0x008
> +#define DMA_DESQ_DONE_OFF	0x00C
> +#define   DMA_DESQ_DONE_PENDING_MASK		0x7FFF
> +#define   DMA_DESQ_DONE_PENDING_SHIFT		0
> +#define   DMA_DESQ_DONE_READ_PTR_MASK		0x1FFF
> +#define   DMA_DESQ_DONE_READ_PTR_SHIFT		16
> +#define DMA_DESQ_ARATTR_OFF	0x010
> +#define   DMA_DESQ_ATTR_CACHE_MASK		0x3F3F
> +#define   DMA_DESQ_ATTR_OUTER_SHAREABLE		0x202
> +#define   DMA_DESQ_ATTR_CACHEABLE		0x3C3C
> +#define DMA_IMSG_CDAT_OFF	0x014
> +#define DMA_IMSG_THRD_OFF	0x018
> +#define   DMA_IMSG_THRD_MASK			0x7FFF
> +#define   DMA_IMSG_THRD_SHIFT			0x0
> +#define DMA_DESQ_AWATTR_OFF	0x01C
> +  /* Same flags as DMA_DESQ_ARATTR_OFF */
> +#define DMA_DESQ_ALLOC_OFF	0x04C
> +#define   DMA_DESQ_ALLOC_WRPTR_MASK		0xFFFF
> +#define   DMA_DESQ_ALLOC_WRPTR_SHIFT		16
> +#define DMA_IMSG_BALR_OFF	0x050
> +#define DMA_IMSG_BAHR_OFF	0x054
> +#define DMA_DESQ_CTRL_OFF	0x100
> +#define	  DMA_DESQ_CTRL_32B			1
> +#define   DMA_DESQ_CTRL_128B			7
> +#define DMA_DESQ_STOP_OFF	0x800
> +#define DMA_DESQ_DEALLOC_OFF	0x804
> +#define DMA_DESQ_ADD_OFF	0x808

Please namespace these and others. Something like MRVL_XOR_XXX or anyother
patter you like would be fine...

> +
> +/* XOR Global registers */
> +#define GLOB_BW_CTRL		0x4
> +#define   GLOB_BW_CTRL_NUM_OSTD_RD_SHIFT	0
> +#define   GLOB_BW_CTRL_NUM_OSTD_RD_VAL		64
> +#define   GLOB_BW_CTRL_NUM_OSTD_WR_SHIFT	8
> +#define   GLOB_BW_CTRL_NUM_OSTD_WR_VAL		8
> +#define   GLOB_BW_CTRL_RD_BURST_LEN_SHIFT	12
> +#define   GLOB_BW_CTRL_RD_BURST_LEN_VAL		4
> +#define   GLOB_BW_CTRL_WR_BURST_LEN_SHIFT	16
> +#define	  GLOB_BW_CTRL_WR_BURST_LEN_VAL		4
> +#define GLOB_PAUSE		0x014
> +#define   GLOB_PAUSE_AXI_TIME_DIS_VAL		0x8
> +#define GLOB_SYS_INT_CAUSE	0x200
> +#define GLOB_SYS_INT_MASK	0x204
> +#define GLOB_MEM_INT_CAUSE	0x220
> +#define GLOB_MEM_INT_MASK	0x224
> +
> +#define MV_XOR_V2_MIN_DESC_SIZE		32
> +#define MV_XOR_V2_EXT_DESC_SIZE		128
> +
> +#define MV_XOR_V2_DESC_RESERVED_SIZE	12
> +#define MV_XOR_V2_DESC_BUFF_D_ADDR_SIZE	12
> +
> +#define MV_XOR_V2_CMD_LINE_NUM_MAX_D_BUF 8

These do look fine!

> +
> +/* descriptors queue size */
> +#define MV_XOR_V2_MAX_DESC_NUM	1024

Is this hardware defined?


> +static void mv_xor_v2_set_data_buffers(struct mv_xor_v2_device *xor_dev,
> +					struct mv_xor_v2_descriptor *desc,
> +					dma_addr_t src, int index)
> +{
> +	int arr_index = ((index >> 1) * 3);
> +
> +	/*
> +	 * fill the buffer's addresses to the descriptor
> +	 * The format of the buffers address for 2 sequential buffers X and X+1:

space around + please.

> +	 *  First word: Buffer-DX-Address-Low[31:0]
> +	 *  Second word:Buffer-DX+1-Address-Low[31:0]
> +	 *  Third word: DX+1-Buffer-Address-High[47:32] [31:16]
> +	 *		DX-Buffer-Address-High[47:32] [15:0]
> +	 */
> +	if ((index & 0x1) == 0) {
> +		desc->data_buff_addr[arr_index] = lower_32_bits(src);
> +
> +		/* Clear lower 16-bits */
> +		desc->data_buff_addr[arr_index + 2] &= ~0xFFFF;
> +
> +		/* Set them if we have a 64 bits DMA address */
> +		if (IS_ENABLED(CONFIG_ARCH_DMA_ADDR_T_64BIT))
> +			desc->data_buff_addr[arr_index + 2] |=
> +				upper_32_bits(src) & 0xFFFF;

So why should it depend on how kernel is configured?

> +	} else {
> +		desc->data_buff_addr[arr_index + 1] =
> +			lower_32_bits(src);
> +
> +		/* Clear upper 16-bits */
> +		desc->data_buff_addr[arr_index + 2] &= ~0xFFFF0000;
> +
> +		/* Set them if we have a 64 bits DMA address */
> +		if (IS_ENABLED(CONFIG_ARCH_DMA_ADDR_T_64BIT))
> +			desc->data_buff_addr[arr_index + 2] |=
> +				(upper_32_bits(src) & 0xFFFF) << 16;
> +	}
> +}
> +
> +/*
> + * Return the next available index in the DESQ.
> + */
> +static inline int mv_xor_v2_get_desq_write_ptr(struct mv_xor_v2_device *xor_dev)

Pls wrap with 80 chars wherever it is reasonable

> +/*
> + * Allocate resources for a channel
> + */
> +static int mv_xor_v2_alloc_chan_resources(struct dma_chan *chan)
> +{
> +	/* nothing to be done here */
> +	return 0;
> +}

No need for empty alloc

> +
> +/*
> + * Free resources of a channel
> + */
> +void mv_xor_v2_free_chan_resources(struct dma_chan *chan)
> +{
> +	/* nothing to be done here */
> +}

and free as well :)


> +static irqreturn_t mv_xor_v2_interrupt_handler(int irq, void *data)
> +{
> +	struct mv_xor_v2_device *xor_dev = data;
> +
> +	/*
> +	 * Update IMSG threshold, to disable new IMSG interrupts until
> +	 * end of the tasklet
> +	 */
> +	mv_xor_v2_set_imsg_thrd(xor_dev, MV_XOR_V2_MAX_DESC_NUM);

You don't want to check the source of interrupt?

> +static dma_cookie_t
> +mv_xor_v2_tx_submit(struct dma_async_tx_descriptor *tx)
> +{
> +	int desq_ptr;
> +	void *dest_hw_desc;
> +	dma_cookie_t cookie;
> +	struct mv_xor_v2_sw_desc *sw_desc =
> +		container_of(tx, struct mv_xor_v2_sw_desc, async_tx);
> +	struct mv_xor_v2_device *xor_dev =
> +		container_of(tx->chan, struct mv_xor_v2_device, dmachan);
> +
> +	dev_dbg(xor_dev->dmadev.dev,
> +		"%s sw_desc %p: async_tx %p\n",
> +		__func__, sw_desc, &sw_desc->async_tx);
> +
> +	/* assign coookie */
> +	spin_lock_bh(&xor_dev->cookie_lock);
> +	cookie = dma_cookie_assign(tx);
> +	spin_unlock_bh(&xor_dev->cookie_lock);
> +
> +	/* lock enqueue DESCQ */
> +	spin_lock_bh(&xor_dev->push_lock);

Why not a generic channel lock which is used in submit, issue_pending and
tasklet. What is the reason for opting different locks?

> +
> +	/* get the next available slot in the DESQ */
> +	desq_ptr = mv_xor_v2_get_desq_write_ptr(xor_dev);
> +
> +	/* copy the HW descriptor from the SW descriptor to the DESQ */
> +	dest_hw_desc = ((void *)xor_dev->hw_desq_virt +

cast to and away from void are not required

> +			(xor_dev->desc_size * desq_ptr));
> +
> +	memcpy(dest_hw_desc, &sw_desc->hw_desc, xor_dev->desc_size);
> +
> +	/* update the DMA Engine with the new descriptor */
> +	mv_xor_v2_add_desc_to_desq(xor_dev, 1);
> +
> +	/* unlock enqueue DESCQ */
> +	spin_unlock_bh(&xor_dev->push_lock);

and if IIUC, you are pushing this to HW as well, that is not quite right if
thats the case. We need to do this in issue_pending

> +static struct dma_async_tx_descriptor *
> +mv_xor_v2_prep_dma_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src,
> +		       unsigned int src_cnt, size_t len, unsigned long flags)
> +{
> +	struct mv_xor_v2_sw_desc	*sw_desc;
> +	struct mv_xor_v2_descriptor	*hw_descriptor;
> +	struct mv_xor_v2_device		*xor_dev;
> +	int i;
> +
> +	BUG_ON(src_cnt > MV_XOR_V2_CMD_LINE_NUM_MAX_D_BUF || src_cnt < 1);

why BUG_ON, is the system unusable after this?

> +/*
> + * poll for a transaction completion
> + */
> +static enum dma_status mv_xor_v2_tx_status(struct dma_chan *chan,
> +		dma_cookie_t cookie, struct dma_tx_state *txstate)
> +{
> +	/* return the transaction status */
> +	return dma_cookie_status(chan, cookie, txstate);

why not assign this as you status callback?

> +static void mv_xor_v2_issue_pending(struct dma_chan *chan)
> +{
> +	struct mv_xor_v2_device *xor_dev =
> +		container_of(chan, struct mv_xor_v2_device, dmachan);
> +
> +	/* Activate the channel */
> +	writel(0, xor_dev->dma_base + DMA_DESQ_STOP_OFF);

what if channel is already running?


> +static void mv_xor_v2_tasklet(unsigned long data)
> +{
> +	struct mv_xor_v2_device *xor_dev = (struct mv_xor_v2_device *) data;
> +	int pending_ptr, num_of_pending, i;
> +	struct mv_xor_v2_descriptor *next_pending_hw_desc = NULL;
> +	struct mv_xor_v2_sw_desc *next_pending_sw_desc = NULL;
> +
> +	dev_dbg(xor_dev->dmadev.dev, "%s %d\n", __func__, __LINE__);
> +
> +	/* get thepending descriptors parameters */

space after the pls

> +static struct platform_driver mv_xor_v2_driver = {
> +	.probe		= mv_xor_v2_probe,
> +	.remove		= mv_xor_v2_remove,
> +	.driver		= {
> +		.owner	= THIS_MODULE,

I dont think we need this
Thomas Petazzoni Feb. 22, 2016, 7:21 a.m. UTC | #9
Rob,

On Sun, 21 Feb 2016 20:53:49 -0600, Rob Herring wrote:

> > +Required properties:
> > +- compatible: Should be "marvell,mv-xor-v2"
> 
> and an SoC specific compatible please.

So you mean *both* marvell,mv-xor-v2 and a SoC specific compatible?

> "marvell,mv" is a bit redundant.

True. I guess marvell,xor-v2 will do.

Thomas
Thomas Petazzoni Feb. 22, 2016, 9:16 a.m. UTC | #10
Hello Vinod,

On Mon, 22 Feb 2016 08:57:30 +0530, Vinod Koul wrote:
> On Mon, Feb 15, 2016 at 08:58:03AM +0100, Thomas Petazzoni wrote:
> > diff --git a/Documentation/devicetree/bindings/dma/mv-xor-v2.txt b/Documentation/devicetree/bindings/dma/mv-xor-v2.txt
> > new file mode 100644
> > index 0000000..0a03dcf
> > --- /dev/null
> > +++ b/Documentation/devicetree/bindings/dma/mv-xor-v2.txt
> > @@ -0,0 +1,19 @@
> > +* Marvell XOR v2 engines
> > +
> > +Required properties:
> > +- compatible: Should be "marvell,mv-xor-v2"
> > +- reg: Should contain registers location and length (two sets)
> > +    the first set is the DMA registers
> > +    the second set is the global registers
> > +- msi-parent: Phandle to the MSI-capable interrupt controller used for
> > +  interrupts.
> > +
> > +Example:
> > +
> > +	xor0@400000 {
> > +		compatible = "marvell,mv-xor-v2";
> > +		reg = <0x400000 0x1000>,
> > +		      <0x410000 0x1000>;
> > +		msi-parent = <&gic_v2m0>;
> > +		dma-coherent;
> > +	};
> 
> This should be a separate patch :)

This is really a personal preference, and different maintainers have
different opinions on the matter. However, if your preference is to
have the DT binding documentation as a separate patch, then I'll split
it, no problem :)

> 
> > +/* DMA Engine Registers */
> > +#define DMA_DESQ_BALR_OFF	0x000
> > +#define DMA_DESQ_BAHR_OFF	0x004
> > +#define DMA_DESQ_SIZE_OFF	0x008
> > +#define DMA_DESQ_DONE_OFF	0x00C
> > +#define   DMA_DESQ_DONE_PENDING_MASK		0x7FFF
> > +#define   DMA_DESQ_DONE_PENDING_SHIFT		0
> > +#define   DMA_DESQ_DONE_READ_PTR_MASK		0x1FFF
> > +#define   DMA_DESQ_DONE_READ_PTR_SHIFT		16
> > +#define DMA_DESQ_ARATTR_OFF	0x010
> > +#define   DMA_DESQ_ATTR_CACHE_MASK		0x3F3F
> > +#define   DMA_DESQ_ATTR_OUTER_SHAREABLE		0x202
> > +#define   DMA_DESQ_ATTR_CACHEABLE		0x3C3C
> > +#define DMA_IMSG_CDAT_OFF	0x014
> > +#define DMA_IMSG_THRD_OFF	0x018
> > +#define   DMA_IMSG_THRD_MASK			0x7FFF
> > +#define   DMA_IMSG_THRD_SHIFT			0x0
> > +#define DMA_DESQ_AWATTR_OFF	0x01C
> > +  /* Same flags as DMA_DESQ_ARATTR_OFF */
> > +#define DMA_DESQ_ALLOC_OFF	0x04C
> > +#define   DMA_DESQ_ALLOC_WRPTR_MASK		0xFFFF
> > +#define   DMA_DESQ_ALLOC_WRPTR_SHIFT		16
> > +#define DMA_IMSG_BALR_OFF	0x050
> > +#define DMA_IMSG_BAHR_OFF	0x054
> > +#define DMA_DESQ_CTRL_OFF	0x100
> > +#define	  DMA_DESQ_CTRL_32B			1
> > +#define   DMA_DESQ_CTRL_128B			7
> > +#define DMA_DESQ_STOP_OFF	0x800
> > +#define DMA_DESQ_DEALLOC_OFF	0x804
> > +#define DMA_DESQ_ADD_OFF	0x808
> 
> Please namespace these and others. Something like MRVL_XOR_XXX or anyother
> patter you like would be fine...

The issue is that with such name-spacing, the names get really long,
and it's hard to stay with the 80 characters limit. But OK, I'll try to
see what I can do.

> > +/* descriptors queue size */
> > +#define MV_XOR_V2_MAX_DESC_NUM	1024
> 
> Is this hardware defined?

No, it is a software choice. The hardware supports up to 2^14
descriptors, i.e 16384, at least from my understanding of the datasheet.

> > +static void mv_xor_v2_set_data_buffers(struct mv_xor_v2_device *xor_dev,
> > +					struct mv_xor_v2_descriptor *desc,
> > +					dma_addr_t src, int index)
> > +{
> > +	int arr_index = ((index >> 1) * 3);
> > +
> > +	/*
> > +	 * fill the buffer's addresses to the descriptor
> > +	 * The format of the buffers address for 2 sequential buffers X and X+1:
> 
> space around + please.

ACK.

> > +	 *  First word: Buffer-DX-Address-Low[31:0]
> > +	 *  Second word:Buffer-DX+1-Address-Low[31:0]
> > +	 *  Third word: DX+1-Buffer-Address-High[47:32] [31:16]
> > +	 *		DX-Buffer-Address-High[47:32] [15:0]
> > +	 */
> > +	if ((index & 0x1) == 0) {
> > +		desc->data_buff_addr[arr_index] = lower_32_bits(src);
> > +
> > +		/* Clear lower 16-bits */
> > +		desc->data_buff_addr[arr_index + 2] &= ~0xFFFF;
> > +
> > +		/* Set them if we have a 64 bits DMA address */
> > +		if (IS_ENABLED(CONFIG_ARCH_DMA_ADDR_T_64BIT))
> > +			desc->data_buff_addr[arr_index + 2] |=
> > +				upper_32_bits(src) & 0xFFFF;
> 
> So why should it depend on how kernel is configured?

Because using upper_32_bits() on a 32-bits type seems wrong? You can do
a git grep for CONFIG_ARCH_DMA_ADDR_T_64BIT, and see that it is already
used in drivers/dma/sh/rcar-dmac.c, drivers/gpu/drm/tegra/dc.c or
drivers/iommu/tegra-smmu.c.

drivers/dma/sh/rcar-dmac.c uses it for exactly the same reason as me:
they need to know if dma_addr_t is 32 bits or 64 bits, and if it's 64
bits, then they need to take the upper 32 bits and feed them to some
register.

> > +/*
> > + * Return the next available index in the DESQ.
> > + */
> > +static inline int mv_xor_v2_get_desq_write_ptr(struct mv_xor_v2_device *xor_dev)
> 
> Pls wrap with 80 chars wherever it is reasonable

Sure.

> > +/*
> > + * Allocate resources for a channel
> > + */
> > +static int mv_xor_v2_alloc_chan_resources(struct dma_chan *chan)
> > +{
> > +	/* nothing to be done here */
> > +	return 0;
> > +}
> 
> No need for empty alloc
> 
> > +
> > +/*
> > + * Free resources of a channel
> > + */
> > +void mv_xor_v2_free_chan_resources(struct dma_chan *chan)
> > +{
> > +	/* nothing to be done here */
> > +}
> 
> and free as well :)

Ah, cool, less useless code, I like that! Will fix.

> > +static irqreturn_t mv_xor_v2_interrupt_handler(int irq, void *data)
> > +{
> > +	struct mv_xor_v2_device *xor_dev = data;
> > +
> > +	/*
> > +	 * Update IMSG threshold, to disable new IMSG interrupts until
> > +	 * end of the tasklet
> > +	 */
> > +	mv_xor_v2_set_imsg_thrd(xor_dev, MV_XOR_V2_MAX_DESC_NUM);
> 
> You don't want to check the source of interrupt?

To protect against what? An improper MSI message that ends up firing
this interrupt for now reason? But only kernel space stuff (or
privileged code in user-space) can write to the MSI triggering
registers, so we should be able to trust that such an improper MSI
message will not arrive.

That being said, I can add a read of the pending descriptors number,
and if it's 0, then bail out with IRQ_NONE. Is that what you were
thinking about?

> > +static dma_cookie_t
> > +mv_xor_v2_tx_submit(struct dma_async_tx_descriptor *tx)
> > +{
> > +	int desq_ptr;
> > +	void *dest_hw_desc;
> > +	dma_cookie_t cookie;
> > +	struct mv_xor_v2_sw_desc *sw_desc =
> > +		container_of(tx, struct mv_xor_v2_sw_desc, async_tx);
> > +	struct mv_xor_v2_device *xor_dev =
> > +		container_of(tx->chan, struct mv_xor_v2_device, dmachan);
> > +
> > +	dev_dbg(xor_dev->dmadev.dev,
> > +		"%s sw_desc %p: async_tx %p\n",
> > +		__func__, sw_desc, &sw_desc->async_tx);
> > +
> > +	/* assign coookie */
> > +	spin_lock_bh(&xor_dev->cookie_lock);
> > +	cookie = dma_cookie_assign(tx);
> > +	spin_unlock_bh(&xor_dev->cookie_lock);
> > +
> > +	/* lock enqueue DESCQ */
> > +	spin_lock_bh(&xor_dev->push_lock);
> 
> Why not a generic channel lock which is used in submit, issue_pending and
> tasklet. What is the reason for opting different locks?

Yeah, agreed. Until I get real performance data, it's probably overkill
to use multiple spinlocks here. I'll rework with one spinlock only.

> > +	/* get the next available slot in the DESQ */
> > +	desq_ptr = mv_xor_v2_get_desq_write_ptr(xor_dev);
> > +
> > +	/* copy the HW descriptor from the SW descriptor to the DESQ */
> > +	dest_hw_desc = ((void *)xor_dev->hw_desq_virt +
> 
> cast to and away from void are not required

Well in this case, it is actually needed, because xor_dev->hw_desq_virt
is a pointer to a struct mv_xor_v2_descriptor, but we want to do
byte-based arithmetic, and not sizeof(struct mv_xor_v2_descriptor)
based arithmetic. Though I admit it's a bit ugly, I'll try to see if we
can't simply use normal sizeof(struct mv_xor_v2_descriptor)
based arithmetic.

> 
> > +			(xor_dev->desc_size * desq_ptr));
> > +
> > +	memcpy(dest_hw_desc, &sw_desc->hw_desc, xor_dev->desc_size);
> > +
> > +	/* update the DMA Engine with the new descriptor */
> > +	mv_xor_v2_add_desc_to_desq(xor_dev, 1);
> > +
> > +	/* unlock enqueue DESCQ */
> > +	spin_unlock_bh(&xor_dev->push_lock);
> 
> and if IIUC, you are pushing this to HW as well, that is not quite right if
> thats the case. We need to do this in issue_pending

issue_pending is already starting the DMA engine, which will process
the descriptors that have been queued in ->tx_submit.

Of course, if the DMA engine is already running, then the descriptors
might be processed right after they are added in the queue in
->tx_submit().

We are using the same scheme in mv_xor.c (not to say that mv_xor.c
doesn't need to be fixed, but it explains why we're using this logic in
mv_xor_v2.c).

> > +static struct dma_async_tx_descriptor *
> > +mv_xor_v2_prep_dma_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src,
> > +		       unsigned int src_cnt, size_t len, unsigned long flags)
> > +{
> > +	struct mv_xor_v2_sw_desc	*sw_desc;
> > +	struct mv_xor_v2_descriptor	*hw_descriptor;
> > +	struct mv_xor_v2_device		*xor_dev;
> > +	int i;
> > +
> > +	BUG_ON(src_cnt > MV_XOR_V2_CMD_LINE_NUM_MAX_D_BUF || src_cnt < 1);
> 
> why BUG_ON, is the system unusable after this?

Well, the XOR engine cannot do XOR with 0 source buffers, or with more
than MV_XOR_V2_CMD_LINE_NUM_MAX_D_BUF.

But I see that I'm already setting max_xor in struct dma_device, so one
can assume that ->prep_dma_xor() will not be called with src_cnt == or
src_cnt > max_xor, so this BUG_ON can be removed.

> > +/*
> > + * poll for a transaction completion
> > + */
> > +static enum dma_status mv_xor_v2_tx_status(struct dma_chan *chan,
> > +		dma_cookie_t cookie, struct dma_tx_state *txstate)
> > +{
> > +	/* return the transaction status */
> > +	return dma_cookie_status(chan, cookie, txstate);
> 
> why not assign this as you status callback?

Right, will do.

> > +static void mv_xor_v2_issue_pending(struct dma_chan *chan)
> > +{
> > +	struct mv_xor_v2_device *xor_dev =
> > +		container_of(chan, struct mv_xor_v2_device, dmachan);
> > +
> > +	/* Activate the channel */
> > +	writel(0, xor_dev->dma_base + DMA_DESQ_STOP_OFF);
> 
> what if channel is already running?

It will continue to run. Datasheet says: "Reset this bit to enable the
XOR unit (DESQ). Set this bit to disable (or Stop) the XOR unit." So as
long as we write 0, the engine will continue processing descriptors.

> > +static void mv_xor_v2_tasklet(unsigned long data)
> > +{
> > +	struct mv_xor_v2_device *xor_dev = (struct mv_xor_v2_device *) data;
> > +	int pending_ptr, num_of_pending, i;
> > +	struct mv_xor_v2_descriptor *next_pending_hw_desc = NULL;
> > +	struct mv_xor_v2_sw_desc *next_pending_sw_desc = NULL;
> > +
> > +	dev_dbg(xor_dev->dmadev.dev, "%s %d\n", __func__, __LINE__);
> > +
> > +	/* get thepending descriptors parameters */
> 
> space after the pls

Absolutely.

> > +static struct platform_driver mv_xor_v2_driver = {
> > +	.probe		= mv_xor_v2_probe,
> > +	.remove		= mv_xor_v2_remove,
> > +	.driver		= {
> > +		.owner	= THIS_MODULE,
> 
> I dont think we need this

This was already fixed in v2:

  http://lists.infradead.org/pipermail/linux-arm-kernel/2016-February/409404.html

Thanks for the review! There are a few questions from you in the above
discussion, I'll wait for your feedback to submit a v3 that includes
your suggested fixes.

Thanks again!

Thomas
Rob Herring Feb. 22, 2016, 7:58 p.m. UTC | #11
On Mon, Feb 22, 2016 at 3:16 AM, Thomas Petazzoni
<thomas.petazzoni@free-electrons.com> wrote:
> Hello Vinod,
>
> On Mon, 22 Feb 2016 08:57:30 +0530, Vinod Koul wrote:
>> On Mon, Feb 15, 2016 at 08:58:03AM +0100, Thomas Petazzoni wrote:
>> > diff --git a/Documentation/devicetree/bindings/dma/mv-xor-v2.txt b/Documentation/devicetree/bindings/dma/mv-xor-v2.txt
>> > new file mode 100644
>> > index 0000000..0a03dcf
>> > --- /dev/null
>> > +++ b/Documentation/devicetree/bindings/dma/mv-xor-v2.txt
>> > @@ -0,0 +1,19 @@
>> > +* Marvell XOR v2 engines
>> > +
>> > +Required properties:
>> > +- compatible: Should be "marvell,mv-xor-v2"
>> > +- reg: Should contain registers location and length (two sets)
>> > +    the first set is the DMA registers
>> > +    the second set is the global registers
>> > +- msi-parent: Phandle to the MSI-capable interrupt controller used for
>> > +  interrupts.
>> > +
>> > +Example:
>> > +
>> > +   xor0@400000 {
>> > +           compatible = "marvell,mv-xor-v2";
>> > +           reg = <0x400000 0x1000>,
>> > +                 <0x410000 0x1000>;
>> > +           msi-parent = <&gic_v2m0>;
>> > +           dma-coherent;
>> > +   };
>>
>> This should be a separate patch :)
>
> This is really a personal preference, and different maintainers have
> different opinions on the matter. However, if your preference is to
> have the DT binding documentation as a separate patch, then I'll split
> it, no problem :)

Read step 1 in Documentation/devicetree/bindings/submitting-patches.txt.

Rob
--
To unsubscribe from this list: send the line "unsubscribe dmaengine" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Rob Herring (Arm) Feb. 22, 2016, 8:02 p.m. UTC | #12
On Mon, Feb 22, 2016 at 1:21 AM, Thomas Petazzoni
<thomas.petazzoni@free-electrons.com> wrote:
> Rob,
>
> On Sun, 21 Feb 2016 20:53:49 -0600, Rob Herring wrote:
>
>> > +Required properties:
>> > +- compatible: Should be "marvell,mv-xor-v2"
>>
>> and an SoC specific compatible please.
>
> So you mean *both* marvell,mv-xor-v2 and a SoC specific compatible?

Yes. Well, really I'd like to see only SoC specific compatibles, but
everyone likes to have their generic ones for matching since it's
always "the same hardware".

Rob
--
To unsubscribe from this list: send the line "unsubscribe dmaengine" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Vinod Koul Feb. 23, 2016, 3:02 a.m. UTC | #13
On Mon, Feb 22, 2016 at 10:16:21AM +0100, Thomas Petazzoni wrote:

> > So why should it depend on how kernel is configured?
> 
> Because using upper_32_bits() on a 32-bits type seems wrong? You can do
> a git grep for CONFIG_ARCH_DMA_ADDR_T_64BIT, and see that it is already
> used in drivers/dma/sh/rcar-dmac.c, drivers/gpu/drm/tegra/dc.c or
> drivers/iommu/tegra-smmu.c.
> 
> drivers/dma/sh/rcar-dmac.c uses it for exactly the same reason as me:
> they need to know if dma_addr_t is 32 bits or 64 bits, and if it's 64
> bits, then they need to take the upper 32 bits and feed them to some
> register.

So is the hardware capable of 32bit of 64bit?


> > You don't want to check the source of interrupt?
> 
> To protect against what? An improper MSI message that ends up firing
> this interrupt for now reason? But only kernel space stuff (or
> privileged code in user-space) can write to the MSI triggering
> registers, so we should be able to trust that such an improper MSI
> message will not arrive.

Your device might go bonkers or some erranious programming does cause
interrupts. Its better to be safe than sorry :)

> 
> That being said, I can add a read of the pending descriptors number,
> and if it's 0, then bail out with IRQ_NONE. Is that what you were
> thinking about?

Sounds good


> > why BUG_ON, is the system unusable after this?
> 
> Well, the XOR engine cannot do XOR with 0 source buffers, or with more
> than MV_XOR_V2_CMD_LINE_NUM_MAX_D_BUF.
> 
> But I see that I'm already setting max_xor in struct dma_device, so one
> can assume that ->prep_dma_xor() will not be called with src_cnt == or
> src_cnt > max_xor, so this BUG_ON can be removed.

Okay, you should return NULL to caller if that is the case
Thomas Petazzoni June 15, 2016, 2:08 p.m. UTC | #14
Hello,

Sorry for the long delay, I've just sent a v3 of this patch series. I'm
commenting below the comments you made during your previous review.

On Mon, 22 Feb 2016 08:57:30 +0530, Vinod Koul wrote:

> > +	xor0@400000 {
> > +		compatible = "marvell,mv-xor-v2";
> > +		reg = <0x400000 0x1000>,
> > +		      <0x410000 0x1000>;
> > +		msi-parent = <&gic_v2m0>;
> > +		dma-coherent;
> > +	};  
> 
> This should be a separate patch :)

Fixed in v3.

> > +#define DMA_DESQ_STOP_OFF	0x800
> > +#define DMA_DESQ_DEALLOC_OFF	0x804
> > +#define DMA_DESQ_ADD_OFF	0x808  
> 
> Please namespace these and others. Something like MRVL_XOR_XXX or anyother
> patter you like would be fine...

Fixed in v3.

> > +/* descriptors queue size */
> > +#define MV_XOR_V2_MAX_DESC_NUM	1024  
> 
> Is this hardware defined?

No, so I've changed this to MV_XOR_V2_DESC_NUM, and added a comment to
explain what are the hardware limits (they depend on the descriptor
size).

> > +	/*
> > +	 * fill the buffer's addresses to the descriptor
> > +	 * The format of the buffers address for 2 sequential buffers X and X+1:  
> 
> space around + please.

Fixed in v3.

> > +		/* Set them if we have a 64 bits DMA address */
> > +		if (IS_ENABLED(CONFIG_ARCH_DMA_ADDR_T_64BIT))
> > +			desc->data_buff_addr[arr_index + 2] |=
> > +				upper_32_bits(src) & 0xFFFF;  
> 
> So why should it depend on how kernel is configured?

Since the driver now depends on ARM64, CONFIG_ARCH_DMA_ADDR_T_64BIT is
always y, so I've dropped those conditions.

> > + * Return the next available index in the DESQ.
> > + */
> > +static inline int mv_xor_v2_get_desq_write_ptr(struct mv_xor_v2_device *xor_dev)  
> 
> Pls wrap with 80 chars wherever it is reasonable

Done in v3.

> > +/*
> > + * Allocate resources for a channel
> > + */
> > +static int mv_xor_v2_alloc_chan_resources(struct dma_chan *chan)
> > +{
> > +	/* nothing to be done here */
> > +	return 0;
> > +}  
> 
> No need for empty alloc

Done in v3.

> > +
> > +/*
> > + * Free resources of a channel
> > + */
> > +void mv_xor_v2_free_chan_resources(struct dma_chan *chan)
> > +{
> > +	/* nothing to be done here */
> > +}  
> 
> and free as well :)

Ditto.

> > +static irqreturn_t mv_xor_v2_interrupt_handler(int irq, void *data)
> > +{
> > +	struct mv_xor_v2_device *xor_dev = data;
> > +
> > +	/*
> > +	 * Update IMSG threshold, to disable new IMSG interrupts until
> > +	 * end of the tasklet
> > +	 */
> > +	mv_xor_v2_set_imsg_thrd(xor_dev, MV_XOR_V2_MAX_DESC_NUM);  
> 
> You don't want to check the source of interrupt?

As we discussed, I've added a check that the number of pending
descriptors to process is not 0.

> > +	/* lock enqueue DESCQ */
> > +	spin_lock_bh(&xor_dev->push_lock);  
> 
> Why not a generic channel lock which is used in submit, issue_pending and
> tasklet. What is the reason for opting different locks?

I've changed to use a single spinlock per channel.

> > +	/* get the next available slot in the DESQ */
> > +	desq_ptr = mv_xor_v2_get_desq_write_ptr(xor_dev);
> > +
> > +	/* copy the HW descriptor from the SW descriptor to the DESQ */
> > +	dest_hw_desc = ((void *)xor_dev->hw_desq_virt +  
> 
> cast to and away from void are not required

Correct, I've simplified that in v3.

> > +			(xor_dev->desc_size * desq_ptr));
> > +
> > +	memcpy(dest_hw_desc, &sw_desc->hw_desc, xor_dev->desc_size);
> > +
> > +	/* update the DMA Engine with the new descriptor */
> > +	mv_xor_v2_add_desc_to_desq(xor_dev, 1);
> > +
> > +	/* unlock enqueue DESCQ */
> > +	spin_unlock_bh(&xor_dev->push_lock);  
> 
> and if IIUC, you are pushing this to HW as well, that is not quite right if
> thats the case. We need to do this in issue_pending

This is probably the only thing that I have not changed. The mv_xor
driver is already using the same strategy, and enqueuing in
issue_pending() would force us to add the request to a temporary linked
list, which would be dequeued in issue_pending(). This is quite a bit
of additional processing, while pushing the new requests directly to
the engine works fine.

> > +static struct dma_async_tx_descriptor *
> > +mv_xor_v2_prep_dma_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src,
> > +		       unsigned int src_cnt, size_t len, unsigned long flags)
> > +{
> > +	struct mv_xor_v2_sw_desc	*sw_desc;
> > +	struct mv_xor_v2_descriptor	*hw_descriptor;
> > +	struct mv_xor_v2_device		*xor_dev;
> > +	int i;
> > +
> > +	BUG_ON(src_cnt > MV_XOR_V2_CMD_LINE_NUM_MAX_D_BUF || src_cnt < 1);  
> 
> why BUG_ON, is the system unusable after this?

Changed in v3 to regular error checking (i.e. returning NULL).

> > +static enum dma_status mv_xor_v2_tx_status(struct dma_chan *chan,
> > +		dma_cookie_t cookie, struct dma_tx_state *txstate)
> > +{
> > +	/* return the transaction status */
> > +	return dma_cookie_status(chan, cookie, txstate);  
> 
> why not assign this as you status callback?

Done in v3.

> > +static void mv_xor_v2_issue_pending(struct dma_chan *chan)
> > +{
> > +	struct mv_xor_v2_device *xor_dev =
> > +		container_of(chan, struct mv_xor_v2_device, dmachan);
> > +
> > +	/* Activate the channel */
> > +	writel(0, xor_dev->dma_base + DMA_DESQ_STOP_OFF);  
> 
> what if channel is already running?

It will keep running. Writing 0 to this register activates the channel
if not already activated, and if already activated, the engine keeps
running.

> > +static void mv_xor_v2_tasklet(unsigned long data)
> > +{
> > +	struct mv_xor_v2_device *xor_dev = (struct mv_xor_v2_device *) data;
> > +	int pending_ptr, num_of_pending, i;
> > +	struct mv_xor_v2_descriptor *next_pending_hw_desc = NULL;
> > +	struct mv_xor_v2_sw_desc *next_pending_sw_desc = NULL;
> > +
> > +	dev_dbg(xor_dev->dmadev.dev, "%s %d\n", __func__, __LINE__);
> > +
> > +	/* get thepending descriptors parameters */  
> 
> space after the pls

Done in v3.

> 
> > +static struct platform_driver mv_xor_v2_driver = {
> > +	.probe		= mv_xor_v2_probe,
> > +	.remove		= mv_xor_v2_remove,
> > +	.driver		= {
> > +		.owner	= THIS_MODULE,  
> 
> I dont think we need this

Was already fixed in v2, so it's fixed in v3 as well.

Thanks a lot!

Thomas
Vinod Koul June 15, 2016, 4:41 p.m. UTC | #15
On Wed, Jun 15, 2016 at 04:08:37PM +0200, Thomas Petazzoni wrote:
 
> > > +			(xor_dev->desc_size * desq_ptr));
> > > +
> > > +	memcpy(dest_hw_desc, &sw_desc->hw_desc, xor_dev->desc_size);
> > > +
> > > +	/* update the DMA Engine with the new descriptor */
> > > +	mv_xor_v2_add_desc_to_desq(xor_dev, 1);
> > > +
> > > +	/* unlock enqueue DESCQ */
> > > +	spin_unlock_bh(&xor_dev->push_lock);  
> > 
> > and if IIUC, you are pushing this to HW as well, that is not quite right if
> > thats the case. We need to do this in issue_pending
> 
> This is probably the only thing that I have not changed. The mv_xor
> driver is already using the same strategy, and enqueuing in
> issue_pending() would force us to add the request to a temporary linked
> list, which would be dequeued in issue_pending(). This is quite a bit
> of additional processing, while pushing the new requests directly to
> the engine works fine.

Well that is wrong! And patch is welcome for mv_xor as well :)

The DMAengine API mandates that we should submit a descriptor to a queue and
then push them by invoking issue_pending.

The users are also expected to follow this
Thomas Petazzoni June 16, 2016, 12:42 p.m. UTC | #16
Hello,

On Wed, 15 Jun 2016 22:11:31 +0530, Vinod Koul wrote:

> > This is probably the only thing that I have not changed. The mv_xor
> > driver is already using the same strategy, and enqueuing in
> > issue_pending() would force us to add the request to a temporary linked
> > list, which would be dequeued in issue_pending(). This is quite a bit
> > of additional processing, while pushing the new requests directly to
> > the engine works fine.  
> 
> Well that is wrong! And patch is welcome for mv_xor as well :)
> 
> The DMAengine API mandates that we should submit a descriptor to a queue and
> then push them by invoking issue_pending.
> 
> The users are also expected to follow this

I have just sent a v4 that fixes this for the mv_xor_v2 driver. Now, in
->tx_submit() we only fill in the HW descriptors, but do not tell the
engine to process them. It's only at ->issue_pending() time that we
tell the engine that there N new descriptors to process.

I'll have a look at mv_xor a bit later.

Thanks!

Thomas
Vinod Koul June 17, 2016, 2:39 a.m. UTC | #17
On Thu, Jun 16, 2016 at 02:42:24PM +0200, Thomas Petazzoni wrote:
> Hello,
> 
> On Wed, 15 Jun 2016 22:11:31 +0530, Vinod Koul wrote:
> 
> > > This is probably the only thing that I have not changed. The mv_xor
> > > driver is already using the same strategy, and enqueuing in
> > > issue_pending() would force us to add the request to a temporary linked
> > > list, which would be dequeued in issue_pending(). This is quite a bit
> > > of additional processing, while pushing the new requests directly to
> > > the engine works fine.  
> > 
> > Well that is wrong! And patch is welcome for mv_xor as well :)
> > 
> > The DMAengine API mandates that we should submit a descriptor to a queue and
> > then push them by invoking issue_pending.
> > 
> > The users are also expected to follow this
> 
> I have just sent a v4 that fixes this for the mv_xor_v2 driver. Now, in
> ->tx_submit() we only fill in the HW descriptors, but do not tell the
> engine to process them. It's only at ->issue_pending() time that we
> tell the engine that there N new descriptors to process.

Okay that sounds good to me, I will take a look at it..
> 
> I'll have a look at mv_xor a bit later.

Sounds fair enough
diff mbox

Patch

diff --git a/Documentation/devicetree/bindings/dma/mv-xor-v2.txt b/Documentation/devicetree/bindings/dma/mv-xor-v2.txt
new file mode 100644
index 0000000..0a03dcf
--- /dev/null
+++ b/Documentation/devicetree/bindings/dma/mv-xor-v2.txt
@@ -0,0 +1,19 @@ 
+* Marvell XOR v2 engines
+
+Required properties:
+- compatible: Should be "marvell,mv-xor-v2"
+- reg: Should contain registers location and length (two sets)
+    the first set is the DMA registers
+    the second set is the global registers
+- msi-parent: Phandle to the MSI-capable interrupt controller used for
+  interrupts.
+
+Example:
+
+	xor0@400000 {
+		compatible = "marvell,mv-xor-v2";
+		reg = <0x400000 0x1000>,
+		      <0x410000 0x1000>;
+		msi-parent = <&gic_v2m0>;
+		dma-coherent;
+	};
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index 79b1390..aeaac8b 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -339,6 +339,19 @@  config MV_XOR
 	---help---
 	  Enable support for the Marvell XOR engine.
 
+config MV_XOR_V2
+	bool "Marvell XOR engine version 2 support "
+	select DMA_ENGINE
+	select DMA_ENGINE_RAID
+	select ASYNC_TX_ENABLE_CHANNEL_SWITCH
+	select GENERIC_MSI_IRQ_DOMAIN
+	---help---
+	  Enable support for the Marvell version 2 XOR engine.
+
+	  This engine provides acceleration for copy, XOR and RAID6
+	  operations, and is available on Marvell Armada 7K and 8K
+	  platforms.
+
 config MXS_DMA
 	bool "MXS DMA support"
 	depends on SOC_IMX23 || SOC_IMX28 || SOC_IMX6Q
diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
index 2dd0a067..da8ca73 100644
--- a/drivers/dma/Makefile
+++ b/drivers/dma/Makefile
@@ -45,6 +45,7 @@  obj-$(CONFIG_MMP_TDMA) += mmp_tdma.o
 obj-$(CONFIG_MOXART_DMA) += moxart-dma.o
 obj-$(CONFIG_MPC512X_DMA) += mpc512x_dma.o
 obj-$(CONFIG_MV_XOR) += mv_xor.o
+obj-$(CONFIG_MV_XOR_V2) += mv_xor_v2.o
 obj-$(CONFIG_MXS_DMA) += mxs-dma.o
 obj-$(CONFIG_MX3_IPU) += ipu/
 obj-$(CONFIG_NBPFAXI_DMA) += nbpfaxi.o
diff --git a/drivers/dma/mv_xor_v2.c b/drivers/dma/mv_xor_v2.c
new file mode 100644
index 0000000..ee73d5c
--- /dev/null
+++ b/drivers/dma/mv_xor_v2.c
@@ -0,0 +1,880 @@ 
+/*
+ * Copyright (C) 2015-2016 Marvell International Ltd.
+
+ * This program is free software: you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation, either version 2 of the
+ * License, or any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+
+#include <linux/dma-mapping.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/msi.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
+#include <linux/platform_device.h>
+#include <linux/spinlock.h>
+
+#include "dmaengine.h"
+
+/* DMA Engine Registers */
+#define DMA_DESQ_BALR_OFF	0x000
+#define DMA_DESQ_BAHR_OFF	0x004
+#define DMA_DESQ_SIZE_OFF	0x008
+#define DMA_DESQ_DONE_OFF	0x00C
+#define   DMA_DESQ_DONE_PENDING_MASK		0x7FFF
+#define   DMA_DESQ_DONE_PENDING_SHIFT		0
+#define   DMA_DESQ_DONE_READ_PTR_MASK		0x1FFF
+#define   DMA_DESQ_DONE_READ_PTR_SHIFT		16
+#define DMA_DESQ_ARATTR_OFF	0x010
+#define   DMA_DESQ_ATTR_CACHE_MASK		0x3F3F
+#define   DMA_DESQ_ATTR_OUTER_SHAREABLE		0x202
+#define   DMA_DESQ_ATTR_CACHEABLE		0x3C3C
+#define DMA_IMSG_CDAT_OFF	0x014
+#define DMA_IMSG_THRD_OFF	0x018
+#define   DMA_IMSG_THRD_MASK			0x7FFF
+#define   DMA_IMSG_THRD_SHIFT			0x0
+#define DMA_DESQ_AWATTR_OFF	0x01C
+  /* Same flags as DMA_DESQ_ARATTR_OFF */
+#define DMA_DESQ_ALLOC_OFF	0x04C
+#define   DMA_DESQ_ALLOC_WRPTR_MASK		0xFFFF
+#define   DMA_DESQ_ALLOC_WRPTR_SHIFT		16
+#define DMA_IMSG_BALR_OFF	0x050
+#define DMA_IMSG_BAHR_OFF	0x054
+#define DMA_DESQ_CTRL_OFF	0x100
+#define	  DMA_DESQ_CTRL_32B			1
+#define   DMA_DESQ_CTRL_128B			7
+#define DMA_DESQ_STOP_OFF	0x800
+#define DMA_DESQ_DEALLOC_OFF	0x804
+#define DMA_DESQ_ADD_OFF	0x808
+
+/* XOR Global registers */
+#define GLOB_BW_CTRL		0x4
+#define   GLOB_BW_CTRL_NUM_OSTD_RD_SHIFT	0
+#define   GLOB_BW_CTRL_NUM_OSTD_RD_VAL		64
+#define   GLOB_BW_CTRL_NUM_OSTD_WR_SHIFT	8
+#define   GLOB_BW_CTRL_NUM_OSTD_WR_VAL		8
+#define   GLOB_BW_CTRL_RD_BURST_LEN_SHIFT	12
+#define   GLOB_BW_CTRL_RD_BURST_LEN_VAL		4
+#define   GLOB_BW_CTRL_WR_BURST_LEN_SHIFT	16
+#define	  GLOB_BW_CTRL_WR_BURST_LEN_VAL		4
+#define GLOB_PAUSE		0x014
+#define   GLOB_PAUSE_AXI_TIME_DIS_VAL		0x8
+#define GLOB_SYS_INT_CAUSE	0x200
+#define GLOB_SYS_INT_MASK	0x204
+#define GLOB_MEM_INT_CAUSE	0x220
+#define GLOB_MEM_INT_MASK	0x224
+
+#define MV_XOR_V2_MIN_DESC_SIZE		32
+#define MV_XOR_V2_EXT_DESC_SIZE		128
+
+#define MV_XOR_V2_DESC_RESERVED_SIZE	12
+#define MV_XOR_V2_DESC_BUFF_D_ADDR_SIZE	12
+
+#define MV_XOR_V2_CMD_LINE_NUM_MAX_D_BUF 8
+
+/* descriptors queue size */
+#define MV_XOR_V2_MAX_DESC_NUM	1024
+
+/**
+ * struct mv_xor_v2_descriptor - DMA HW descriptor
+ * @desc_id: used by S/W and is not affected by H/W.
+ * @flags: error and status flags
+ * @crc32_result: CRC32 calculation result
+ * @desc_ctrl: operation mode and control flags
+ * @buff_size: amount of bytes to be processed
+ * @fill_pattern_src_addr: Fill-Pattern or Source-Address and
+ * AW-Attributes
+ * @data_buff_addr: Source (and might be RAID6 destination)
+ * addresses of data buffers in RAID5 and RAID6
+ * @reserved: reserved
+ */
+struct mv_xor_v2_descriptor {
+	u16 desc_id;
+	u16 flags;
+	u32 crc32_result;
+	u32 desc_ctrl;
+
+	/* Definitions for desc_ctrl */
+#define DESC_NUM_ACTIVE_D_BUF_SHIFT	22
+#define DESC_OP_MODE_SHIFT		28
+#define DESC_OP_MODE_NOP		0	/* Idle operation */
+#define DESC_OP_MODE_MEMCPY		1	/* Pure-DMA operation */
+#define DESC_OP_MODE_MEMSET		2	/* Mem-Fill operation */
+#define DESC_OP_MODE_MEMINIT		3	/* Mem-Init operation */
+#define DESC_OP_MODE_MEM_COMPARE	4	/* Mem-Compare operation */
+#define DESC_OP_MODE_CRC32		5	/* CRC32 calculation */
+#define DESC_OP_MODE_XOR		6	/* RAID5 (XOR) operation */
+#define DESC_OP_MODE_RAID6		7	/* RAID6 P&Q-generation */
+#define DESC_OP_MODE_RAID6_REC		8	/* RAID6 Recovery */
+#define DESC_Q_BUFFER_ENABLE		BIT(16)
+#define DESC_P_BUFFER_ENABLE		BIT(17)
+#define DESC_IOD			BIT(27)
+
+	u32 buff_size;
+	u32 fill_pattern_src_addr[4];
+	u32 data_buff_addr[MV_XOR_V2_DESC_BUFF_D_ADDR_SIZE];
+	u32 reserved[MV_XOR_V2_DESC_RESERVED_SIZE];
+};
+
+/**
+ * struct mv_xor_v2_device - implements a xor device
+ * @sw_ll_lock: serializes enqueue/dequeue operations to the sw
+ * descriptors pool
+ * @push_lock: serializes enqueue operations to the DESCQ
+ * @cookie_lock: serializes of cookies control
+ * @dma_base: memory mapped DMA register base
+ * @glob_base: memory mapped global register base
+ * @irq_tasklet:
+ * @free_sw_desc: linked list of free SW descriptors
+ * @dmadev: dma device
+ * @dmachan: dma channel
+ * @hw_desq: HW descriptors queue
+ * @hw_desq_virt: virtual address of DESCQ
+ * @sw_desq: SW descriptors queue
+ * @desc_size: HW descriptor size
+*/
+struct mv_xor_v2_device {
+	spinlock_t sw_ll_lock;
+	spinlock_t push_lock;
+	spinlock_t cookie_lock;
+	void __iomem *dma_base;
+	void __iomem *glob_base;
+	struct tasklet_struct irq_tasklet;
+	struct list_head free_sw_desc;
+	struct dma_device dmadev;
+	struct dma_chan	dmachan;
+	dma_addr_t hw_desq;
+	struct mv_xor_v2_descriptor *hw_desq_virt;
+	struct mv_xor_v2_sw_desc *sw_desq;
+	int desc_size;
+};
+
+/**
+ * struct mv_xor_v2_sw_desc - implements a xor SW descriptor
+ * @idx: descriptor index
+ * @async_tx: support for the async_tx api
+ * @hw_desc: assosiated HW descriptor
+ * @free_list: node of the free SW descriprots list
+*/
+struct mv_xor_v2_sw_desc {
+	int idx;
+	struct dma_async_tx_descriptor async_tx;
+	struct mv_xor_v2_descriptor hw_desc;
+	struct list_head free_list;
+};
+
+/*
+ * Fill the data buffers to a HW descriptor
+ */
+static void mv_xor_v2_set_data_buffers(struct mv_xor_v2_device *xor_dev,
+					struct mv_xor_v2_descriptor *desc,
+					dma_addr_t src, int index)
+{
+	int arr_index = ((index >> 1) * 3);
+
+	/*
+	 * fill the buffer's addresses to the descriptor
+	 * The format of the buffers address for 2 sequential buffers X and X+1:
+	 *  First word: Buffer-DX-Address-Low[31:0]
+	 *  Second word:Buffer-DX+1-Address-Low[31:0]
+	 *  Third word: DX+1-Buffer-Address-High[47:32] [31:16]
+	 *		DX-Buffer-Address-High[47:32] [15:0]
+	 */
+	if ((index & 0x1) == 0) {
+		desc->data_buff_addr[arr_index] = lower_32_bits(src);
+
+		/* Clear lower 16-bits */
+		desc->data_buff_addr[arr_index + 2] &= ~0xFFFF;
+
+		/* Set them if we have a 64 bits DMA address */
+		if (IS_ENABLED(CONFIG_ARCH_DMA_ADDR_T_64BIT))
+			desc->data_buff_addr[arr_index + 2] |=
+				upper_32_bits(src) & 0xFFFF;
+	} else {
+		desc->data_buff_addr[arr_index + 1] =
+			lower_32_bits(src);
+
+		/* Clear upper 16-bits */
+		desc->data_buff_addr[arr_index + 2] &= ~0xFFFF0000;
+
+		/* Set them if we have a 64 bits DMA address */
+		if (IS_ENABLED(CONFIG_ARCH_DMA_ADDR_T_64BIT))
+			desc->data_buff_addr[arr_index + 2] |=
+				(upper_32_bits(src) & 0xFFFF) << 16;
+	}
+}
+
+/*
+ * Return the next available index in the DESQ.
+ */
+static inline int mv_xor_v2_get_desq_write_ptr(struct mv_xor_v2_device *xor_dev)
+{
+	/* read the index for the next available descriptor in the DESQ */
+	u32 reg = readl(xor_dev->dma_base + DMA_DESQ_ALLOC_OFF);
+
+	return ((reg >> DMA_DESQ_ALLOC_WRPTR_SHIFT)
+		& DMA_DESQ_ALLOC_WRPTR_MASK);
+}
+
+/*
+ * notify the engine of new descriptors, and update the available index.
+ */
+static void mv_xor_v2_add_desc_to_desq(struct mv_xor_v2_device *xor_dev,
+				       int num_of_desc)
+{
+	/* write the number of new descriptors in the DESQ. */
+	writel(num_of_desc, xor_dev->dma_base + DMA_DESQ_ADD_OFF);
+}
+
+/*
+ * free HW descriptors
+ */
+static void mv_xor_v2_free_desc_from_desq(struct mv_xor_v2_device *xor_dev,
+					  int num_of_desc)
+{
+	/* write the number of new descriptors in the DESQ. */
+	writel(num_of_desc, xor_dev->dma_base + DMA_DESQ_DEALLOC_OFF);
+}
+
+/*
+ * Set descriptor size
+ * Return the HW descriptor size in bytes
+ */
+static int mv_xor_v2_set_desc_size(struct mv_xor_v2_device *xor_dev)
+{
+	writel(DMA_DESQ_CTRL_128B,
+	       xor_dev->dma_base + DMA_DESQ_CTRL_OFF);
+
+	return MV_XOR_V2_EXT_DESC_SIZE;
+}
+
+/*
+ * Allocate resources for a channel
+ */
+static int mv_xor_v2_alloc_chan_resources(struct dma_chan *chan)
+{
+	/* nothing to be done here */
+	return 0;
+}
+
+/*
+ * Free resources of a channel
+ */
+void mv_xor_v2_free_chan_resources(struct dma_chan *chan)
+{
+	/* nothing to be done here */
+}
+
+/*
+ * Set the IMSG threshold
+ */
+static inline
+void mv_xor_v2_set_imsg_thrd(struct mv_xor_v2_device *xor_dev, int thrd_val)
+{
+	u32 reg;
+
+	reg = readl(xor_dev->dma_base + DMA_IMSG_THRD_OFF);
+
+	reg &= (~DMA_IMSG_THRD_MASK << DMA_IMSG_THRD_SHIFT);
+	reg |= (thrd_val << DMA_IMSG_THRD_SHIFT);
+
+	writel(reg, xor_dev->dma_base + DMA_IMSG_THRD_OFF);
+}
+
+static irqreturn_t mv_xor_v2_interrupt_handler(int irq, void *data)
+{
+	struct mv_xor_v2_device *xor_dev = data;
+
+	/*
+	 * Update IMSG threshold, to disable new IMSG interrupts until
+	 * end of the tasklet
+	 */
+	mv_xor_v2_set_imsg_thrd(xor_dev, MV_XOR_V2_MAX_DESC_NUM);
+
+	/* schedule a tasklet to handle descriptors callbacks */
+	tasklet_schedule(&xor_dev->irq_tasklet);
+
+	return IRQ_HANDLED;
+}
+
+/*
+ * submit a descriptor to the DMA engine
+ */
+static dma_cookie_t
+mv_xor_v2_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+	int desq_ptr;
+	void *dest_hw_desc;
+	dma_cookie_t cookie;
+	struct mv_xor_v2_sw_desc *sw_desc =
+		container_of(tx, struct mv_xor_v2_sw_desc, async_tx);
+	struct mv_xor_v2_device *xor_dev =
+		container_of(tx->chan, struct mv_xor_v2_device, dmachan);
+
+	dev_dbg(xor_dev->dmadev.dev,
+		"%s sw_desc %p: async_tx %p\n",
+		__func__, sw_desc, &sw_desc->async_tx);
+
+	/* assign coookie */
+	spin_lock_bh(&xor_dev->cookie_lock);
+	cookie = dma_cookie_assign(tx);
+	spin_unlock_bh(&xor_dev->cookie_lock);
+
+	/* lock enqueue DESCQ */
+	spin_lock_bh(&xor_dev->push_lock);
+
+	/* get the next available slot in the DESQ */
+	desq_ptr = mv_xor_v2_get_desq_write_ptr(xor_dev);
+
+	/* copy the HW descriptor from the SW descriptor to the DESQ */
+	dest_hw_desc = ((void *)xor_dev->hw_desq_virt +
+			(xor_dev->desc_size * desq_ptr));
+
+	memcpy(dest_hw_desc, &sw_desc->hw_desc, xor_dev->desc_size);
+
+	/* update the DMA Engine with the new descriptor */
+	mv_xor_v2_add_desc_to_desq(xor_dev, 1);
+
+	/* unlock enqueue DESCQ */
+	spin_unlock_bh(&xor_dev->push_lock);
+
+	return cookie;
+}
+
+/*
+ * Prepare a SW descriptor
+ */
+static struct mv_xor_v2_sw_desc	*
+mv_xor_v2_prep_sw_desc(struct mv_xor_v2_device *xor_dev)
+{
+	struct mv_xor_v2_sw_desc *sw_desc;
+
+	/* Lock the channel */
+	spin_lock_bh(&xor_dev->sw_ll_lock);
+
+	/* get a free SW descriptor from the SW DESQ */
+	sw_desc = list_first_entry(&xor_dev->free_sw_desc,
+				   struct mv_xor_v2_sw_desc, free_list);
+	list_del(&sw_desc->free_list);
+
+	/* Release the channel */
+	spin_unlock_bh(&xor_dev->sw_ll_lock);
+
+	/* set the async tx descriptor */
+	dma_async_tx_descriptor_init(&sw_desc->async_tx, &xor_dev->dmachan);
+	sw_desc->async_tx.tx_submit = mv_xor_v2_tx_submit;
+	async_tx_ack(&sw_desc->async_tx);
+
+	return sw_desc;
+}
+
+/*
+ * Prepare a HW descriptor for a memcpy operation
+ */
+static struct dma_async_tx_descriptor *
+mv_xor_v2_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest,
+			  dma_addr_t src, size_t len, unsigned long flags)
+{
+	struct mv_xor_v2_sw_desc	*sw_desc;
+	struct mv_xor_v2_descriptor	*hw_descriptor;
+	struct mv_xor_v2_device		*xor_dev;
+
+	xor_dev = container_of(chan, struct mv_xor_v2_device, dmachan);
+
+	dev_dbg(xor_dev->dmadev.dev,
+		"%s len: %zu src %pad dest %pad flags: %ld\n",
+		__func__, len, &src, &dest, flags);
+
+	sw_desc = mv_xor_v2_prep_sw_desc(xor_dev);
+
+	sw_desc->async_tx.flags = flags;
+
+	/* set the HW descriptor */
+	hw_descriptor = &sw_desc->hw_desc;
+
+	/* save the SW descriptor ID to restore when operation is done */
+	hw_descriptor->desc_id = sw_desc->idx;
+
+	/* Set the MEMCPY control word */
+	hw_descriptor->desc_ctrl =
+		DESC_OP_MODE_MEMCPY << DESC_OP_MODE_SHIFT;
+
+	if (flags & DMA_PREP_INTERRUPT)
+		hw_descriptor->desc_ctrl |= DESC_IOD;
+
+	/* Set source address */
+	hw_descriptor->fill_pattern_src_addr[0] = lower_32_bits(src);
+
+	if (IS_ENABLED(CONFIG_ARCH_DMA_ADDR_T_64BIT))
+		hw_descriptor->fill_pattern_src_addr[1] =
+			upper_32_bits(src) & 0xFFFF;
+	else
+		hw_descriptor->fill_pattern_src_addr[1] = 0;
+
+	/* Set Destination address */
+	hw_descriptor->fill_pattern_src_addr[2] = lower_32_bits(dest);
+
+	if (IS_ENABLED(CONFIG_ARCH_DMA_ADDR_T_64BIT))
+		hw_descriptor->fill_pattern_src_addr[3] =
+			upper_32_bits(dest) & 0xFFFF;
+	else
+		hw_descriptor->fill_pattern_src_addr[3] = 0;
+
+	/* Set buffers size */
+	hw_descriptor->buff_size = len;
+
+	/* return the async tx descriptor */
+	return &sw_desc->async_tx;
+}
+
+/*
+ * Prepare a HW descriptor for a XOR operation
+ */
+static struct dma_async_tx_descriptor *
+mv_xor_v2_prep_dma_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src,
+		       unsigned int src_cnt, size_t len, unsigned long flags)
+{
+	struct mv_xor_v2_sw_desc	*sw_desc;
+	struct mv_xor_v2_descriptor	*hw_descriptor;
+	struct mv_xor_v2_device		*xor_dev;
+	int i;
+
+	BUG_ON(src_cnt > MV_XOR_V2_CMD_LINE_NUM_MAX_D_BUF || src_cnt < 1);
+
+	xor_dev = container_of(chan, struct mv_xor_v2_device, dmachan);
+
+	dev_dbg(xor_dev->dmadev.dev,
+		"%s src_cnt: %d len: %zu dest %pad flags: %ld\n",
+		__func__, src_cnt, len, &dest, flags);
+
+	BUG_ON(xor_dev->desc_size != MV_XOR_V2_EXT_DESC_SIZE);
+
+	sw_desc = mv_xor_v2_prep_sw_desc(xor_dev);
+
+	sw_desc->async_tx.flags = flags;
+
+	/* set the HW descriptor */
+	hw_descriptor = &sw_desc->hw_desc;
+
+	/* save the SW descriptor ID to restore when operation is done */
+	hw_descriptor->desc_id = sw_desc->idx;
+
+	/* Set the XOR control word */
+	hw_descriptor->desc_ctrl =
+		DESC_OP_MODE_XOR << DESC_OP_MODE_SHIFT;
+	hw_descriptor->desc_ctrl |= DESC_P_BUFFER_ENABLE;
+
+	if (flags & DMA_PREP_INTERRUPT)
+		hw_descriptor->desc_ctrl |= DESC_IOD;
+
+	/* Set the data buffers */
+	for (i = 0; i < src_cnt; i++)
+		mv_xor_v2_set_data_buffers(xor_dev, hw_descriptor, src[i], i);
+
+	hw_descriptor->desc_ctrl |=
+		src_cnt << DESC_NUM_ACTIVE_D_BUF_SHIFT;
+
+	/* Set Destination address */
+	hw_descriptor->fill_pattern_src_addr[2] = lower_32_bits(dest);
+	if (IS_ENABLED(CONFIG_ARCH_DMA_ADDR_T_64BIT))
+		hw_descriptor->fill_pattern_src_addr[3] =
+			upper_32_bits(dest) & 0xFFFF;
+	else
+		hw_descriptor->fill_pattern_src_addr[3] = 0;
+
+	/* Set buffers size */
+	hw_descriptor->buff_size = len;
+
+	/* return the async tx descriptor */
+	return &sw_desc->async_tx;
+}
+
+/*
+ * Prepare a HW descriptor for interrupt operation.
+ */
+static struct dma_async_tx_descriptor *
+mv_xor_v2_prep_dma_interrupt(struct dma_chan *chan, unsigned long flags)
+{
+	struct mv_xor_v2_sw_desc	*sw_desc;
+	struct mv_xor_v2_descriptor	*hw_descriptor;
+	struct mv_xor_v2_device		*xor_dev;
+
+	xor_dev = container_of(chan, struct mv_xor_v2_device, dmachan);
+
+	sw_desc = mv_xor_v2_prep_sw_desc(xor_dev);
+
+	/* set the HW descriptor */
+	hw_descriptor = &sw_desc->hw_desc;
+
+	/* save the SW descriptor ID to restore when operation is done */
+	hw_descriptor->desc_id = sw_desc->idx;
+
+	/* Set the INTERRUPT control word */
+	hw_descriptor->desc_ctrl =
+		DESC_OP_MODE_NOP << DESC_OP_MODE_SHIFT;
+	hw_descriptor->desc_ctrl |= DESC_IOD;
+
+	/* return the async tx descriptor */
+	return &sw_desc->async_tx;
+}
+
+/*
+ * poll for a transaction completion
+ */
+static enum dma_status mv_xor_v2_tx_status(struct dma_chan *chan,
+		dma_cookie_t cookie, struct dma_tx_state *txstate)
+{
+	/* return the transaction status */
+	return dma_cookie_status(chan, cookie, txstate);
+}
+
+/*
+ * push pending transactions to hardware
+ */
+static void mv_xor_v2_issue_pending(struct dma_chan *chan)
+{
+	struct mv_xor_v2_device *xor_dev =
+		container_of(chan, struct mv_xor_v2_device, dmachan);
+
+	/* Activate the channel */
+	writel(0, xor_dev->dma_base + DMA_DESQ_STOP_OFF);
+}
+
+static inline
+int mv_xor_v2_get_pending_params(struct mv_xor_v2_device *xor_dev,
+				 int *pending_ptr)
+{
+	u32 reg;
+
+	reg = readl(xor_dev->dma_base + DMA_DESQ_DONE_OFF);
+
+	/* get the next pending descriptor index */
+	*pending_ptr = ((reg >> DMA_DESQ_DONE_READ_PTR_SHIFT) &
+			DMA_DESQ_DONE_READ_PTR_MASK);
+
+	/* get the number of descriptors pending handle */
+	return ((reg >> DMA_DESQ_DONE_PENDING_SHIFT) &
+		DMA_DESQ_DONE_PENDING_MASK);
+}
+
+/*
+ * handle the descriptors after HW process
+ */
+static void mv_xor_v2_tasklet(unsigned long data)
+{
+	struct mv_xor_v2_device *xor_dev = (struct mv_xor_v2_device *) data;
+	int pending_ptr, num_of_pending, i;
+	struct mv_xor_v2_descriptor *next_pending_hw_desc = NULL;
+	struct mv_xor_v2_sw_desc *next_pending_sw_desc = NULL;
+
+	dev_dbg(xor_dev->dmadev.dev, "%s %d\n", __func__, __LINE__);
+
+	/* get thepending descriptors parameters */
+	num_of_pending = mv_xor_v2_get_pending_params(xor_dev, &pending_ptr);
+
+	/* next HW descriptor */
+	next_pending_hw_desc = (struct mv_xor_v2_descriptor *)
+		((void *)xor_dev->hw_desq_virt +
+		 (xor_dev->desc_size * (pending_ptr)));
+
+	/* loop over free descriptors */
+	for (i = 0; i < num_of_pending; i++) {
+
+		if (pending_ptr > MV_XOR_V2_MAX_DESC_NUM)
+			pending_ptr = 0;
+
+		if (next_pending_sw_desc != NULL)
+			next_pending_hw_desc++;
+
+		/* get the SW descriptor related to the HW descriptor */
+		next_pending_sw_desc =
+			&xor_dev->sw_desq[next_pending_hw_desc->desc_id];
+
+		/* call the callback */
+		if (next_pending_sw_desc->async_tx.cookie > 0) {
+			/*
+			 * update the channel's completed cookie - no
+			 * lock is required the IMSG threshold provide
+			 * the locking
+			 */
+			dma_cookie_complete(&next_pending_sw_desc->async_tx);
+
+			if (next_pending_sw_desc->async_tx.callback)
+				next_pending_sw_desc->async_tx.callback(
+				next_pending_sw_desc->async_tx.callback_param);
+
+			dma_descriptor_unmap(&next_pending_sw_desc->async_tx);
+		}
+
+		dma_run_dependencies(&next_pending_sw_desc->async_tx);
+
+		/* Lock the channel */
+		spin_lock_bh(&xor_dev->sw_ll_lock);
+
+		/* add the SW descriptor to the free descriptors list */
+		list_add(&next_pending_sw_desc->free_list,
+			 &xor_dev->free_sw_desc);
+
+		/* Release the channel */
+		spin_unlock_bh(&xor_dev->sw_ll_lock);
+
+		/* increment the next descriptor */
+		pending_ptr++;
+	}
+
+	if (num_of_pending != 0) {
+		/* free the descriptores */
+		mv_xor_v2_free_desc_from_desq(xor_dev, num_of_pending);
+	}
+
+	/* Update IMSG threshold, to enable new IMSG interrupts */
+	mv_xor_v2_set_imsg_thrd(xor_dev, 0);
+}
+
+/*
+ *	Set DMA Interrupt-message (IMSG) parameters
+ */
+static void mv_xor_v2_set_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
+{
+	struct mv_xor_v2_device *xor_dev = dev_get_drvdata(desc->dev);
+
+	writel(msg->address_lo, xor_dev->dma_base + DMA_IMSG_BALR_OFF);
+	writel(msg->address_hi & 0xFFFF, xor_dev->dma_base + DMA_IMSG_BAHR_OFF);
+	writel(msg->data, xor_dev->dma_base + DMA_IMSG_CDAT_OFF);
+}
+
+static int mv_xor_v2_descq_init(struct mv_xor_v2_device *xor_dev)
+{
+	u32 reg;
+
+	/* write the DESQ size to the DMA engine */
+	writel(MV_XOR_V2_MAX_DESC_NUM, xor_dev->dma_base + DMA_DESQ_SIZE_OFF);
+
+	/* write the DESQ address to the DMA enngine*/
+	writel(xor_dev->hw_desq & 0xFFFFFFFF,
+	       xor_dev->dma_base + DMA_DESQ_BALR_OFF);
+	writel((xor_dev->hw_desq & 0xFFFF00000000) >> 32,
+	       xor_dev->dma_base + DMA_DESQ_BAHR_OFF);
+
+	/* enable the DMA engine */
+	writel(0, xor_dev->dma_base + DMA_DESQ_STOP_OFF);
+
+	/*
+	 * This is a temporary solution, until we activate the
+	 * SMMU. Set the attributes for reading & writing data buffers
+	 * & descriptors to:
+	 *
+	 *  - OuterShareable - Snoops will be performed on CPU caches
+	 *  - Enable cacheable - Bufferable, Modifiable, Other Allocate
+	 *    and Allocate
+	 */
+	reg = readl(xor_dev->dma_base + DMA_DESQ_ARATTR_OFF);
+	reg &= ~DMA_DESQ_ATTR_CACHE_MASK;
+	reg |= DMA_DESQ_ATTR_OUTER_SHAREABLE | DMA_DESQ_ATTR_CACHEABLE;
+	writel(reg, xor_dev->dma_base + DMA_DESQ_ARATTR_OFF);
+
+	reg = readl(xor_dev->dma_base + DMA_DESQ_AWATTR_OFF);
+	reg &= ~DMA_DESQ_ATTR_CACHE_MASK;
+	reg |= DMA_DESQ_ATTR_OUTER_SHAREABLE | DMA_DESQ_ATTR_CACHEABLE;
+	writel(reg, xor_dev->dma_base + DMA_DESQ_AWATTR_OFF);
+
+	/* BW CTRL - set values to optimize the XOR performance:
+	 *
+	 *  - Set WrBurstLen & RdBurstLen - the unit will issue
+	 *    maximum of 256B write/read transactions.
+	 * -  Limit the number of outstanding write & read data
+	 *    (OBB/IBB) requests to the maximal value.
+	*/
+	reg  = GLOB_BW_CTRL_NUM_OSTD_RD_VAL  << GLOB_BW_CTRL_NUM_OSTD_RD_SHIFT;
+	reg |= GLOB_BW_CTRL_NUM_OSTD_WR_VAL  << GLOB_BW_CTRL_NUM_OSTD_WR_SHIFT;
+	reg |= GLOB_BW_CTRL_RD_BURST_LEN_VAL << GLOB_BW_CTRL_RD_BURST_LEN_SHIFT;
+	reg |= GLOB_BW_CTRL_WR_BURST_LEN_VAL << GLOB_BW_CTRL_WR_BURST_LEN_SHIFT;
+	writel(reg, xor_dev->glob_base + GLOB_BW_CTRL);
+
+	/* Disable the AXI timer feature */
+	reg = readl(xor_dev->glob_base + GLOB_PAUSE);
+	reg |= GLOB_PAUSE_AXI_TIME_DIS_VAL;
+	writel(reg, xor_dev->glob_base + GLOB_PAUSE);
+
+	return 0;
+}
+
+static int mv_xor_v2_probe(struct platform_device *pdev)
+{
+	struct mv_xor_v2_device *xor_dev;
+	struct resource *res;
+	int i, ret = 0;
+	struct dma_device *dma_dev;
+	struct mv_xor_v2_sw_desc *sw_desc;
+	struct msi_desc *msi_desc;
+
+	dev_notice(&pdev->dev, "Marvell Version 2 XOR driver\n");
+
+	xor_dev = devm_kzalloc(&pdev->dev, sizeof(*xor_dev), GFP_KERNEL);
+	if (!xor_dev)
+		return -ENOMEM;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	xor_dev->dma_base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(xor_dev->dma_base))
+		return PTR_ERR(xor_dev->dma_base);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	xor_dev->glob_base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(xor_dev->glob_base))
+		return PTR_ERR(xor_dev->glob_base);
+
+	platform_set_drvdata(pdev, xor_dev);
+
+	ret = platform_msi_domain_alloc_irqs(&pdev->dev, 1,
+					     mv_xor_v2_set_msi_msg);
+	if (ret)
+		return ret;
+
+	msi_desc = first_msi_entry(&pdev->dev);
+	if (!msi_desc)
+		goto free_msi_irqs;
+
+	ret = devm_request_irq(&pdev->dev, msi_desc->irq,
+			       mv_xor_v2_interrupt_handler, 0,
+			       dev_name(&pdev->dev), xor_dev);
+	if (ret)
+		goto free_msi_irqs;
+
+	tasklet_init(&xor_dev->irq_tasklet, mv_xor_v2_tasklet,
+		     (unsigned long) xor_dev);
+
+	xor_dev->desc_size = mv_xor_v2_set_desc_size(xor_dev);
+
+	dma_cookie_init(&xor_dev->dmachan);
+
+	/*
+	 * allocate coherent memory for hardware descriptors
+	 * note: writecombine gives slightly better performance, but
+	 * requires that we explicitly flush the writes
+	 */
+	xor_dev->hw_desq_virt =
+		dma_alloc_coherent(&pdev->dev,
+				   xor_dev->desc_size * MV_XOR_V2_MAX_DESC_NUM,
+				   &xor_dev->hw_desq, GFP_KERNEL);
+	if (!xor_dev->hw_desq_virt) {
+		ret = -ENOMEM;
+		goto free_msi_irqs;
+	}
+
+	/* alloc memory for the SW descriptors */
+	xor_dev->sw_desq = devm_kzalloc(&pdev->dev, sizeof(*sw_desc) *
+					MV_XOR_V2_MAX_DESC_NUM, GFP_KERNEL);
+	if (!xor_dev->sw_desq) {
+		ret = -ENOMEM;
+		goto free_hw_desq;
+	}
+
+	/* init the channel locks */
+	spin_lock_init(&xor_dev->sw_ll_lock);
+	spin_lock_init(&xor_dev->push_lock);
+	spin_lock_init(&xor_dev->cookie_lock);
+
+	/* init the free SW descriptors list */
+	INIT_LIST_HEAD(&xor_dev->free_sw_desc);
+
+	/* add all SW descriptors to the free list */
+	for (i = 0; i < MV_XOR_V2_MAX_DESC_NUM; i++) {
+		xor_dev->sw_desq[i].idx = i;
+		list_add(&xor_dev->sw_desq[i].free_list,
+			 &xor_dev->free_sw_desc);
+	}
+
+	dma_dev = &xor_dev->dmadev;
+
+	/* set DMA capabilities */
+	dma_cap_zero(dma_dev->cap_mask);
+	dma_cap_set(DMA_MEMCPY, dma_dev->cap_mask);
+	dma_cap_set(DMA_XOR, dma_dev->cap_mask);
+	dma_cap_set(DMA_INTERRUPT, dma_dev->cap_mask);
+
+	/* init dma link list */
+	INIT_LIST_HEAD(&dma_dev->channels);
+
+	/* set base routines */
+	dma_dev->device_alloc_chan_resources =
+		mv_xor_v2_alloc_chan_resources;
+	dma_dev->device_free_chan_resources =
+		mv_xor_v2_free_chan_resources;
+	dma_dev->device_tx_status = mv_xor_v2_tx_status;
+	dma_dev->device_issue_pending = mv_xor_v2_issue_pending;
+	dma_dev->dev = &pdev->dev;
+
+	dma_dev->device_prep_dma_memcpy = mv_xor_v2_prep_dma_memcpy;
+	dma_dev->device_prep_dma_interrupt = mv_xor_v2_prep_dma_interrupt;
+	dma_dev->max_xor = 8;
+	dma_dev->device_prep_dma_xor = mv_xor_v2_prep_dma_xor;
+
+	xor_dev->dmachan.device = dma_dev;
+
+	list_add_tail(&xor_dev->dmachan.device_node,
+		      &dma_dev->channels);
+
+	mv_xor_v2_descq_init(xor_dev);
+
+	ret = dma_async_device_register(dma_dev);
+	if (ret)
+		goto free_hw_desq;
+
+	return 0;
+
+free_hw_desq:
+	dma_free_coherent(&pdev->dev,
+			  xor_dev->desc_size * MV_XOR_V2_MAX_DESC_NUM,
+			  xor_dev->hw_desq_virt, xor_dev->hw_desq);
+free_msi_irqs:
+	platform_msi_domain_free_irqs(&pdev->dev);
+	return ret;
+}
+
+static int mv_xor_v2_remove(struct platform_device *pdev)
+{
+	struct mv_xor_v2_device *xor_dev = platform_get_drvdata(pdev);
+
+	dma_async_device_unregister(&xor_dev->dmadev);
+
+	dma_free_coherent(&pdev->dev,
+			  xor_dev->desc_size * MV_XOR_V2_MAX_DESC_NUM,
+			  xor_dev->hw_desq_virt, xor_dev->hw_desq);
+
+	platform_msi_domain_free_irqs(&pdev->dev);
+
+	return 0;
+}
+
+#ifdef CONFIG_OF
+static const struct of_device_id mv_xor_v2_dt_ids[] = {
+	{ .compatible = "marvell,mv-xor-v2", },
+	{},
+};
+MODULE_DEVICE_TABLE(of, mv_xor_v2_dt_ids);
+#endif
+
+static struct platform_driver mv_xor_v2_driver = {
+	.probe		= mv_xor_v2_probe,
+	.remove		= mv_xor_v2_remove,
+	.driver		= {
+		.owner	= THIS_MODULE,
+		.name	= "mv_xor_v2",
+		.of_match_table = of_match_ptr(mv_xor_v2_dt_ids),
+	},
+};
+
+module_platform_driver(mv_xor_v2_driver);
+
+MODULE_DESCRIPTION("DMA engine driver for Marvell's Version 2 of XOR engine");
+MODULE_LICENSE("GPL");
+