Message ID | 20221124172207.153718-6-prabhakar.mahadev-lad.rj@bp.renesas.com (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | Palmer Dabbelt |
Headers | show |
Series | AX45MP: Add support to non-coherent DMA | expand |
Context | Check | Description |
---|---|---|
conchuod/patch_count | success | Link |
conchuod/cover_letter | success | Series has a cover letter |
conchuod/tree_selection | success | Guessed tree name to be for-next |
conchuod/fixes_present | success | Fixes tag not required for -next series |
conchuod/verify_signedoff | success | Signed-off-by tag matches author and committer |
conchuod/kdoc | success | Errors and warnings before: 0 this patch: 0 |
conchuod/module_param | success | Was 0 now: 0 |
conchuod/build_rv32_defconfig | success | Build OK |
conchuod/build_warn_rv64 | fail | Errors and warnings before: 0 this patch: 0 |
conchuod/dtb_warn_rv64 | success | Errors and warnings before: 0 this patch: 0 |
conchuod/header_inline | success | No static functions without inline keyword in header files |
conchuod/checkpatch | fail | ERROR: Macros with complex values should be enclosed in parentheses |
conchuod/source_inline | success | Was 0 now: 0 |
conchuod/build_rv64_nommu_k210_defconfig | success | Build OK |
conchuod/verify_fixes | success | No Fixes tag |
conchuod/build_rv64_nommu_virt_defconfig | success | Build OK |
Am Donnerstag, 24. November 2022, 18:22:05 CET schrieb Prabhakar: > From: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com> > > Pass direction and operation to ALT_CMO_OP() macro. > > This is in preparation for adding errata for the Andes CPU core. can you provide more explanation why that is necessary please? I guess you want to use different cache operations for some cases? Thanks Heiko > Signed-off-by: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com> > --- > RFC v3 -> v4 > * New patch > --- > arch/riscv/include/asm/cacheflush.h | 4 ++++ > arch/riscv/include/asm/errata_list.h | 8 ++++++-- > arch/riscv/mm/dma-noncoherent.c | 15 ++++++++++----- > 3 files changed, 20 insertions(+), 7 deletions(-) > > diff --git a/arch/riscv/include/asm/cacheflush.h b/arch/riscv/include/asm/cacheflush.h > index f6fbe7042f1c..4a04d1be7c67 100644 > --- a/arch/riscv/include/asm/cacheflush.h > +++ b/arch/riscv/include/asm/cacheflush.h > @@ -8,6 +8,10 @@ > > #include <linux/mm.h> > > +#define NON_COHERENT_SYNC_DMA_FOR_DEVICE 0 > +#define NON_COHERENT_SYNC_DMA_FOR_CPU 1 > +#define NON_COHERENT_DMA_PREP 2 > + > static inline void local_flush_icache_all(void) > { > asm volatile ("fence.i" ::: "memory"); > diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h > index 2ba7e6e74540..48e899a8e7a9 100644 > --- a/arch/riscv/include/asm/errata_list.h > +++ b/arch/riscv/include/asm/errata_list.h > @@ -124,7 +124,7 @@ asm volatile(ALTERNATIVE( \ > #define THEAD_flush_A0 ".long 0x0275000b" > #define THEAD_SYNC_S ".long 0x0190000b" > > -#define ALT_CMO_OP(_op, _start, _size, _cachesize) \ > +#define ALT_CMO_OP(_op, _start, _size, _cachesize, _dir, _ops) \ > asm volatile(ALTERNATIVE_2( \ > __nops(6), \ > "mv a0, %1\n\t" \ > @@ -146,7 +146,11 @@ asm volatile(ALTERNATIVE_2( \ > ERRATA_THEAD_CMO, CONFIG_ERRATA_THEAD_CMO) \ > : : "r"(_cachesize), \ > "r"((unsigned long)(_start) & ~((_cachesize) - 1UL)), \ > - "r"((unsigned long)(_start) + (_size)) \ > + "r"((unsigned long)(_start) + (_size)), \ > + "r"((unsigned long)(_start)), \ > + "r"((unsigned long)(_size)), \ > + "r"((unsigned long)(_dir)), \ > + "r"((unsigned long)(_ops)) \ > : "a0") > > #define THEAD_C9XX_RV_IRQ_PMU 17 > diff --git a/arch/riscv/mm/dma-noncoherent.c b/arch/riscv/mm/dma-noncoherent.c > index d919efab6eba..e2b82034f504 100644 > --- a/arch/riscv/mm/dma-noncoherent.c > +++ b/arch/riscv/mm/dma-noncoherent.c > @@ -19,13 +19,16 @@ void arch_sync_dma_for_device(phys_addr_t paddr, size_t size, > > switch (dir) { > case DMA_TO_DEVICE: > - ALT_CMO_OP(clean, vaddr, size, riscv_cbom_block_size); > + ALT_CMO_OP(clean, vaddr, size, riscv_cbom_block_size, > + dir, NON_COHERENT_SYNC_DMA_FOR_DEVICE); > break; > case DMA_FROM_DEVICE: > - ALT_CMO_OP(clean, vaddr, size, riscv_cbom_block_size); > + ALT_CMO_OP(clean, vaddr, size, riscv_cbom_block_size, > + dir, NON_COHERENT_SYNC_DMA_FOR_DEVICE); > break; > case DMA_BIDIRECTIONAL: > - ALT_CMO_OP(flush, vaddr, size, riscv_cbom_block_size); > + ALT_CMO_OP(flush, vaddr, size, riscv_cbom_block_size, > + dir, NON_COHERENT_SYNC_DMA_FOR_DEVICE); > break; > default: > break; > @@ -42,7 +45,8 @@ void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size, > break; > case DMA_FROM_DEVICE: > case DMA_BIDIRECTIONAL: > - ALT_CMO_OP(flush, vaddr, size, riscv_cbom_block_size); > + ALT_CMO_OP(flush, vaddr, size, riscv_cbom_block_size, > + dir, NON_COHERENT_SYNC_DMA_FOR_CPU); > break; > default: > break; > @@ -53,7 +57,8 @@ void arch_dma_prep_coherent(struct page *page, size_t size) > { > void *flush_addr = page_address(page); > > - ALT_CMO_OP(flush, flush_addr, size, riscv_cbom_block_size); > + ALT_CMO_OP(flush, flush_addr, size, riscv_cbom_block_size, > + 0, NON_COHERENT_DMA_PREP); > } > > void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, >
Hi Heiko, Thank you for the review. On Thu, Nov 24, 2022 at 6:29 PM Heiko Stübner <heiko@sntech.de> wrote: > > Am Donnerstag, 24. November 2022, 18:22:05 CET schrieb Prabhakar: > > From: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com> > > > > Pass direction and operation to ALT_CMO_OP() macro. > > > > This is in preparation for adding errata for the Andes CPU core. > > can you provide more explanation why that is necessary please? > I guess you want to use different cache operations for some cases? > Yes basically to call different cache operations based on the dir and operations (and also this allows to export just one function to handle the errata). I'll update the commit message in the next version. Cheers, Prabhakar
On 11/24/22 13:18, Lad, Prabhakar wrote: > Hi Heiko, > > Thank you for the review. > > On Thu, Nov 24, 2022 at 6:29 PM Heiko Stübner <heiko@sntech.de> wrote: >> >> Am Donnerstag, 24. November 2022, 18:22:05 CET schrieb Prabhakar: >>> From: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com> >>> >>> Pass direction and operation to ALT_CMO_OP() macro. >>> >>> This is in preparation for adding errata for the Andes CPU core. >> >> can you provide more explanation why that is necessary please? >> I guess you want to use different cache operations for some cases? >> > Yes basically to call different cache operations based on the dir and > operations (and also this allows to export just one function to handle > the errata). I'll update the commit message in the next version. This makes things less efficient, because it requires more instructions and registers inside the alternative section, and your function duplicates the logic from arch_sync_dma_for_device(). The alternative is already passed the operation (clean/flush/invalidate) as a token, so you can construct the function name with token pasting. Regards, Samuel
Hi Samuel, Thank you for the review. On Fri, Nov 25, 2022 at 6:49 PM Samuel Holland <samuel@sholland.org> wrote: > > On 11/24/22 13:18, Lad, Prabhakar wrote: > > Hi Heiko, > > > > Thank you for the review. > > > > On Thu, Nov 24, 2022 at 6:29 PM Heiko Stübner <heiko@sntech.de> wrote: > >> > >> Am Donnerstag, 24. November 2022, 18:22:05 CET schrieb Prabhakar: > >>> From: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com> > >>> > >>> Pass direction and operation to ALT_CMO_OP() macro. > >>> > >>> This is in preparation for adding errata for the Andes CPU core. > >> > >> can you provide more explanation why that is necessary please? > >> I guess you want to use different cache operations for some cases? > >> > > Yes basically to call different cache operations based on the dir and > > operations (and also this allows to export just one function to handle > > the errata). I'll update the commit message in the next version. > > This makes things less efficient, because it requires more instructions > and registers inside the alternative section, and your function > duplicates the logic from arch_sync_dma_for_device(). The alternative is > already passed the operation (clean/flush/invalidate) as a token, so you > can construct the function name with token pasting. > I did think about it but that didn't help for example in the arch_dma_prep_coherent() we are calling flush token, but on RZ/Five for arch_dma_prep_coherent() we have to do nothing. Cheers, Prabhakar
diff --git a/arch/riscv/include/asm/cacheflush.h b/arch/riscv/include/asm/cacheflush.h index f6fbe7042f1c..4a04d1be7c67 100644 --- a/arch/riscv/include/asm/cacheflush.h +++ b/arch/riscv/include/asm/cacheflush.h @@ -8,6 +8,10 @@ #include <linux/mm.h> +#define NON_COHERENT_SYNC_DMA_FOR_DEVICE 0 +#define NON_COHERENT_SYNC_DMA_FOR_CPU 1 +#define NON_COHERENT_DMA_PREP 2 + static inline void local_flush_icache_all(void) { asm volatile ("fence.i" ::: "memory"); diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h index 2ba7e6e74540..48e899a8e7a9 100644 --- a/arch/riscv/include/asm/errata_list.h +++ b/arch/riscv/include/asm/errata_list.h @@ -124,7 +124,7 @@ asm volatile(ALTERNATIVE( \ #define THEAD_flush_A0 ".long 0x0275000b" #define THEAD_SYNC_S ".long 0x0190000b" -#define ALT_CMO_OP(_op, _start, _size, _cachesize) \ +#define ALT_CMO_OP(_op, _start, _size, _cachesize, _dir, _ops) \ asm volatile(ALTERNATIVE_2( \ __nops(6), \ "mv a0, %1\n\t" \ @@ -146,7 +146,11 @@ asm volatile(ALTERNATIVE_2( \ ERRATA_THEAD_CMO, CONFIG_ERRATA_THEAD_CMO) \ : : "r"(_cachesize), \ "r"((unsigned long)(_start) & ~((_cachesize) - 1UL)), \ - "r"((unsigned long)(_start) + (_size)) \ + "r"((unsigned long)(_start) + (_size)), \ + "r"((unsigned long)(_start)), \ + "r"((unsigned long)(_size)), \ + "r"((unsigned long)(_dir)), \ + "r"((unsigned long)(_ops)) \ : "a0") #define THEAD_C9XX_RV_IRQ_PMU 17 diff --git a/arch/riscv/mm/dma-noncoherent.c b/arch/riscv/mm/dma-noncoherent.c index d919efab6eba..e2b82034f504 100644 --- a/arch/riscv/mm/dma-noncoherent.c +++ b/arch/riscv/mm/dma-noncoherent.c @@ -19,13 +19,16 @@ void arch_sync_dma_for_device(phys_addr_t paddr, size_t size, switch (dir) { case DMA_TO_DEVICE: - ALT_CMO_OP(clean, vaddr, size, riscv_cbom_block_size); + ALT_CMO_OP(clean, vaddr, size, riscv_cbom_block_size, + dir, NON_COHERENT_SYNC_DMA_FOR_DEVICE); break; case DMA_FROM_DEVICE: - ALT_CMO_OP(clean, vaddr, size, riscv_cbom_block_size); + ALT_CMO_OP(clean, vaddr, size, riscv_cbom_block_size, + dir, NON_COHERENT_SYNC_DMA_FOR_DEVICE); break; case DMA_BIDIRECTIONAL: - ALT_CMO_OP(flush, vaddr, size, riscv_cbom_block_size); + ALT_CMO_OP(flush, vaddr, size, riscv_cbom_block_size, + dir, NON_COHERENT_SYNC_DMA_FOR_DEVICE); break; default: break; @@ -42,7 +45,8 @@ void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size, break; case DMA_FROM_DEVICE: case DMA_BIDIRECTIONAL: - ALT_CMO_OP(flush, vaddr, size, riscv_cbom_block_size); + ALT_CMO_OP(flush, vaddr, size, riscv_cbom_block_size, + dir, NON_COHERENT_SYNC_DMA_FOR_CPU); break; default: break; @@ -53,7 +57,8 @@ void arch_dma_prep_coherent(struct page *page, size_t size) { void *flush_addr = page_address(page); - ALT_CMO_OP(flush, flush_addr, size, riscv_cbom_block_size); + ALT_CMO_OP(flush, flush_addr, size, riscv_cbom_block_size, + 0, NON_COHERENT_DMA_PREP); } void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,