diff mbox series

[v4,5/7] riscv: mm: dma-noncoherent: Pass direction and operation to ALT_CMO_OP()

Message ID 20221124172207.153718-6-prabhakar.mahadev-lad.rj@bp.renesas.com (mailing list archive)
State Superseded
Delegated to: Palmer Dabbelt
Headers show
Series AX45MP: Add support to non-coherent DMA | expand

Checks

Context Check Description
conchuod/patch_count success Link
conchuod/cover_letter success Series has a cover letter
conchuod/tree_selection success Guessed tree name to be for-next
conchuod/fixes_present success Fixes tag not required for -next series
conchuod/verify_signedoff success Signed-off-by tag matches author and committer
conchuod/kdoc success Errors and warnings before: 0 this patch: 0
conchuod/module_param success Was 0 now: 0
conchuod/build_rv32_defconfig success Build OK
conchuod/build_warn_rv64 fail Errors and warnings before: 0 this patch: 0
conchuod/dtb_warn_rv64 success Errors and warnings before: 0 this patch: 0
conchuod/header_inline success No static functions without inline keyword in header files
conchuod/checkpatch fail ERROR: Macros with complex values should be enclosed in parentheses
conchuod/source_inline success Was 0 now: 0
conchuod/build_rv64_nommu_k210_defconfig success Build OK
conchuod/verify_fixes success No Fixes tag
conchuod/build_rv64_nommu_virt_defconfig success Build OK

Commit Message

Lad, Prabhakar Nov. 24, 2022, 5:22 p.m. UTC
From: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>

Pass direction and operation to ALT_CMO_OP() macro.

This is in preparation for adding errata for the Andes CPU core.

Signed-off-by: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>
---
RFC v3 -> v4
* New patch
---
 arch/riscv/include/asm/cacheflush.h  |  4 ++++
 arch/riscv/include/asm/errata_list.h |  8 ++++++--
 arch/riscv/mm/dma-noncoherent.c      | 15 ++++++++++-----
 3 files changed, 20 insertions(+), 7 deletions(-)

Comments

Heiko Stübner Nov. 24, 2022, 6:29 p.m. UTC | #1
Am Donnerstag, 24. November 2022, 18:22:05 CET schrieb Prabhakar:
> From: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>
> 
> Pass direction and operation to ALT_CMO_OP() macro.
> 
> This is in preparation for adding errata for the Andes CPU core.

can you provide more explanation why that is necessary please?
I guess you want to use different cache operations for some cases?


Thanks
Heiko

> Signed-off-by: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>
> ---
> RFC v3 -> v4
> * New patch
> ---
>  arch/riscv/include/asm/cacheflush.h  |  4 ++++
>  arch/riscv/include/asm/errata_list.h |  8 ++++++--
>  arch/riscv/mm/dma-noncoherent.c      | 15 ++++++++++-----
>  3 files changed, 20 insertions(+), 7 deletions(-)
> 
> diff --git a/arch/riscv/include/asm/cacheflush.h b/arch/riscv/include/asm/cacheflush.h
> index f6fbe7042f1c..4a04d1be7c67 100644
> --- a/arch/riscv/include/asm/cacheflush.h
> +++ b/arch/riscv/include/asm/cacheflush.h
> @@ -8,6 +8,10 @@
>  
>  #include <linux/mm.h>
>  
> +#define NON_COHERENT_SYNC_DMA_FOR_DEVICE	0
> +#define NON_COHERENT_SYNC_DMA_FOR_CPU		1
> +#define NON_COHERENT_DMA_PREP			2
> +
>  static inline void local_flush_icache_all(void)
>  {
>  	asm volatile ("fence.i" ::: "memory");
> diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h
> index 2ba7e6e74540..48e899a8e7a9 100644
> --- a/arch/riscv/include/asm/errata_list.h
> +++ b/arch/riscv/include/asm/errata_list.h
> @@ -124,7 +124,7 @@ asm volatile(ALTERNATIVE(						\
>  #define THEAD_flush_A0	".long 0x0275000b"
>  #define THEAD_SYNC_S	".long 0x0190000b"
>  
> -#define ALT_CMO_OP(_op, _start, _size, _cachesize)			\
> +#define ALT_CMO_OP(_op, _start, _size, _cachesize, _dir, _ops)		\
>  asm volatile(ALTERNATIVE_2(						\
>  	__nops(6),							\
>  	"mv a0, %1\n\t"							\
> @@ -146,7 +146,11 @@ asm volatile(ALTERNATIVE_2(						\
>  			ERRATA_THEAD_CMO, CONFIG_ERRATA_THEAD_CMO)	\
>  	: : "r"(_cachesize),						\
>  	    "r"((unsigned long)(_start) & ~((_cachesize) - 1UL)),	\
> -	    "r"((unsigned long)(_start) + (_size))			\
> +	    "r"((unsigned long)(_start) + (_size)),			\
> +	    "r"((unsigned long)(_start)),				\
> +	    "r"((unsigned long)(_size)),				\
> +	    "r"((unsigned long)(_dir)),					\
> +	    "r"((unsigned long)(_ops))					\
>  	: "a0")
>  
>  #define THEAD_C9XX_RV_IRQ_PMU			17
> diff --git a/arch/riscv/mm/dma-noncoherent.c b/arch/riscv/mm/dma-noncoherent.c
> index d919efab6eba..e2b82034f504 100644
> --- a/arch/riscv/mm/dma-noncoherent.c
> +++ b/arch/riscv/mm/dma-noncoherent.c
> @@ -19,13 +19,16 @@ void arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
>  
>  	switch (dir) {
>  	case DMA_TO_DEVICE:
> -		ALT_CMO_OP(clean, vaddr, size, riscv_cbom_block_size);
> +		ALT_CMO_OP(clean, vaddr, size, riscv_cbom_block_size,
> +			   dir, NON_COHERENT_SYNC_DMA_FOR_DEVICE);
>  		break;
>  	case DMA_FROM_DEVICE:
> -		ALT_CMO_OP(clean, vaddr, size, riscv_cbom_block_size);
> +		ALT_CMO_OP(clean, vaddr, size, riscv_cbom_block_size,
> +			   dir, NON_COHERENT_SYNC_DMA_FOR_DEVICE);
>  		break;
>  	case DMA_BIDIRECTIONAL:
> -		ALT_CMO_OP(flush, vaddr, size, riscv_cbom_block_size);
> +		ALT_CMO_OP(flush, vaddr, size, riscv_cbom_block_size,
> +			   dir, NON_COHERENT_SYNC_DMA_FOR_DEVICE);
>  		break;
>  	default:
>  		break;
> @@ -42,7 +45,8 @@ void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size,
>  		break;
>  	case DMA_FROM_DEVICE:
>  	case DMA_BIDIRECTIONAL:
> -		ALT_CMO_OP(flush, vaddr, size, riscv_cbom_block_size);
> +		ALT_CMO_OP(flush, vaddr, size, riscv_cbom_block_size,
> +			   dir, NON_COHERENT_SYNC_DMA_FOR_CPU);
>  		break;
>  	default:
>  		break;
> @@ -53,7 +57,8 @@ void arch_dma_prep_coherent(struct page *page, size_t size)
>  {
>  	void *flush_addr = page_address(page);
>  
> -	ALT_CMO_OP(flush, flush_addr, size, riscv_cbom_block_size);
> +	ALT_CMO_OP(flush, flush_addr, size, riscv_cbom_block_size,
> +		   0, NON_COHERENT_DMA_PREP);
>  }
>  
>  void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
>
Lad, Prabhakar Nov. 24, 2022, 7:18 p.m. UTC | #2
Hi Heiko,

Thank you for the review.

On Thu, Nov 24, 2022 at 6:29 PM Heiko Stübner <heiko@sntech.de> wrote:
>
> Am Donnerstag, 24. November 2022, 18:22:05 CET schrieb Prabhakar:
> > From: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>
> >
> > Pass direction and operation to ALT_CMO_OP() macro.
> >
> > This is in preparation for adding errata for the Andes CPU core.
>
> can you provide more explanation why that is necessary please?
> I guess you want to use different cache operations for some cases?
>
Yes basically to call different cache operations based on the dir and
operations (and also this allows to export just one function to handle
the errata). I'll update the commit message in the next version.

Cheers,
Prabhakar
Samuel Holland Nov. 25, 2022, 6:49 p.m. UTC | #3
On 11/24/22 13:18, Lad, Prabhakar wrote:
> Hi Heiko,
> 
> Thank you for the review.
> 
> On Thu, Nov 24, 2022 at 6:29 PM Heiko Stübner <heiko@sntech.de> wrote:
>>
>> Am Donnerstag, 24. November 2022, 18:22:05 CET schrieb Prabhakar:
>>> From: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>
>>>
>>> Pass direction and operation to ALT_CMO_OP() macro.
>>>
>>> This is in preparation for adding errata for the Andes CPU core.
>>
>> can you provide more explanation why that is necessary please?
>> I guess you want to use different cache operations for some cases?
>>
> Yes basically to call different cache operations based on the dir and
> operations (and also this allows to export just one function to handle
> the errata). I'll update the commit message in the next version.

This makes things less efficient, because it requires more instructions
and registers inside the alternative section, and your function
duplicates the logic from arch_sync_dma_for_device(). The alternative is
already passed the operation (clean/flush/invalidate) as a token, so you
can construct the function name with token pasting.

Regards,
Samuel
Lad, Prabhakar Nov. 25, 2022, 8:53 p.m. UTC | #4
Hi Samuel,

Thank you for the review.

On Fri, Nov 25, 2022 at 6:49 PM Samuel Holland <samuel@sholland.org> wrote:
>
> On 11/24/22 13:18, Lad, Prabhakar wrote:
> > Hi Heiko,
> >
> > Thank you for the review.
> >
> > On Thu, Nov 24, 2022 at 6:29 PM Heiko Stübner <heiko@sntech.de> wrote:
> >>
> >> Am Donnerstag, 24. November 2022, 18:22:05 CET schrieb Prabhakar:
> >>> From: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>
> >>>
> >>> Pass direction and operation to ALT_CMO_OP() macro.
> >>>
> >>> This is in preparation for adding errata for the Andes CPU core.
> >>
> >> can you provide more explanation why that is necessary please?
> >> I guess you want to use different cache operations for some cases?
> >>
> > Yes basically to call different cache operations based on the dir and
> > operations (and also this allows to export just one function to handle
> > the errata). I'll update the commit message in the next version.
>
> This makes things less efficient, because it requires more instructions
> and registers inside the alternative section, and your function
> duplicates the logic from arch_sync_dma_for_device(). The alternative is
> already passed the operation (clean/flush/invalidate) as a token, so you
> can construct the function name with token pasting.
>
I did think about it but that didn't help for example in the
arch_dma_prep_coherent() we are calling flush token, but on RZ/Five
for arch_dma_prep_coherent() we have to do nothing.

Cheers,
Prabhakar
diff mbox series

Patch

diff --git a/arch/riscv/include/asm/cacheflush.h b/arch/riscv/include/asm/cacheflush.h
index f6fbe7042f1c..4a04d1be7c67 100644
--- a/arch/riscv/include/asm/cacheflush.h
+++ b/arch/riscv/include/asm/cacheflush.h
@@ -8,6 +8,10 @@ 
 
 #include <linux/mm.h>
 
+#define NON_COHERENT_SYNC_DMA_FOR_DEVICE	0
+#define NON_COHERENT_SYNC_DMA_FOR_CPU		1
+#define NON_COHERENT_DMA_PREP			2
+
 static inline void local_flush_icache_all(void)
 {
 	asm volatile ("fence.i" ::: "memory");
diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h
index 2ba7e6e74540..48e899a8e7a9 100644
--- a/arch/riscv/include/asm/errata_list.h
+++ b/arch/riscv/include/asm/errata_list.h
@@ -124,7 +124,7 @@  asm volatile(ALTERNATIVE(						\
 #define THEAD_flush_A0	".long 0x0275000b"
 #define THEAD_SYNC_S	".long 0x0190000b"
 
-#define ALT_CMO_OP(_op, _start, _size, _cachesize)			\
+#define ALT_CMO_OP(_op, _start, _size, _cachesize, _dir, _ops)		\
 asm volatile(ALTERNATIVE_2(						\
 	__nops(6),							\
 	"mv a0, %1\n\t"							\
@@ -146,7 +146,11 @@  asm volatile(ALTERNATIVE_2(						\
 			ERRATA_THEAD_CMO, CONFIG_ERRATA_THEAD_CMO)	\
 	: : "r"(_cachesize),						\
 	    "r"((unsigned long)(_start) & ~((_cachesize) - 1UL)),	\
-	    "r"((unsigned long)(_start) + (_size))			\
+	    "r"((unsigned long)(_start) + (_size)),			\
+	    "r"((unsigned long)(_start)),				\
+	    "r"((unsigned long)(_size)),				\
+	    "r"((unsigned long)(_dir)),					\
+	    "r"((unsigned long)(_ops))					\
 	: "a0")
 
 #define THEAD_C9XX_RV_IRQ_PMU			17
diff --git a/arch/riscv/mm/dma-noncoherent.c b/arch/riscv/mm/dma-noncoherent.c
index d919efab6eba..e2b82034f504 100644
--- a/arch/riscv/mm/dma-noncoherent.c
+++ b/arch/riscv/mm/dma-noncoherent.c
@@ -19,13 +19,16 @@  void arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
 
 	switch (dir) {
 	case DMA_TO_DEVICE:
-		ALT_CMO_OP(clean, vaddr, size, riscv_cbom_block_size);
+		ALT_CMO_OP(clean, vaddr, size, riscv_cbom_block_size,
+			   dir, NON_COHERENT_SYNC_DMA_FOR_DEVICE);
 		break;
 	case DMA_FROM_DEVICE:
-		ALT_CMO_OP(clean, vaddr, size, riscv_cbom_block_size);
+		ALT_CMO_OP(clean, vaddr, size, riscv_cbom_block_size,
+			   dir, NON_COHERENT_SYNC_DMA_FOR_DEVICE);
 		break;
 	case DMA_BIDIRECTIONAL:
-		ALT_CMO_OP(flush, vaddr, size, riscv_cbom_block_size);
+		ALT_CMO_OP(flush, vaddr, size, riscv_cbom_block_size,
+			   dir, NON_COHERENT_SYNC_DMA_FOR_DEVICE);
 		break;
 	default:
 		break;
@@ -42,7 +45,8 @@  void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size,
 		break;
 	case DMA_FROM_DEVICE:
 	case DMA_BIDIRECTIONAL:
-		ALT_CMO_OP(flush, vaddr, size, riscv_cbom_block_size);
+		ALT_CMO_OP(flush, vaddr, size, riscv_cbom_block_size,
+			   dir, NON_COHERENT_SYNC_DMA_FOR_CPU);
 		break;
 	default:
 		break;
@@ -53,7 +57,8 @@  void arch_dma_prep_coherent(struct page *page, size_t size)
 {
 	void *flush_addr = page_address(page);
 
-	ALT_CMO_OP(flush, flush_addr, size, riscv_cbom_block_size);
+	ALT_CMO_OP(flush, flush_addr, size, riscv_cbom_block_size,
+		   0, NON_COHERENT_DMA_PREP);
 }
 
 void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,