Message ID | 20221124172207.153718-8-prabhakar.mahadev-lad.rj@bp.renesas.com (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | Palmer Dabbelt |
Headers | show |
Series | AX45MP: Add support to non-coherent DMA | expand |
Am Donnerstag, 24. November 2022, 18:22:07 CET schrieb Prabhakar: > From: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com> > > On the AX45MP core, cache coherency is a specification option so it may > not be supported. In this case DMA will fail. As a workaround, firstly we > allocate a global dma coherent pool from which DMA allocations are taken > and marked as non-cacheable + bufferable using the PMA region as specified > in the device tree. Synchronization callbacks are implemented to > synchronize when doing DMA transactions. > > The Andes AX45MP core has a Programmable Physical Memory Attributes (PMA) > block that allows dynamic adjustment of memory attributes in the runtime. > It contains a configurable amount of PMA entries implemented as CSR > registers to control the attributes of memory locations in interest. > > Below are the memory attributes supported: > * Device, Non-bufferable > * Device, bufferable > * Memory, Non-cacheable, Non-bufferable > * Memory, Non-cacheable, Bufferable > * Memory, Write-back, No-allocate > * Memory, Write-back, Read-allocate > * Memory, Write-back, Write-allocate > * Memory, Write-back, Read and Write-allocate > > This patch adds support to configure the memory attributes of the memory > regions as passed from the l2 cache node and exposes the cache management > ops. > > More info about PMA (section 10.3): > Link: http://www.andestech.com/wp-content/uploads/AX45MP-1C-Rev.-5.0.0-Datasheet.pdf > > Signed-off-by: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com> > --- > RFC v3 -> v4 > * Made use of runtime patching instead of compile time > * Now just exposing single function ax45mp_no_iocp_cmo() for CMO handling > * Added a check to make sure cache line size is always 64 bytes > * Renamed folder rzf -> rzfive > * Improved Kconfig description > * Dropped L2 cache configuration > * Dropped unnecessary casts > * Fixed comments pointed by Geert, apart from use of PTR_ALIGN_XYZ() macros. > --- > arch/riscv/include/asm/cacheflush.h | 8 + > arch/riscv/include/asm/errata_list.h | 32 +- > drivers/soc/renesas/Kconfig | 7 + > drivers/soc/renesas/Makefile | 2 + > drivers/soc/renesas/rzfive/Kconfig | 6 + > drivers/soc/renesas/rzfive/Makefile | 3 + > drivers/soc/renesas/rzfive/ax45mp_cache.c | 415 ++++++++++++++++++++++ > drivers/soc/renesas/rzfive/ax45mp_sbi.h | 29 ++ > 8 files changed, 496 insertions(+), 6 deletions(-) > create mode 100644 drivers/soc/renesas/rzfive/Kconfig > create mode 100644 drivers/soc/renesas/rzfive/Makefile > create mode 100644 drivers/soc/renesas/rzfive/ax45mp_cache.c > create mode 100644 drivers/soc/renesas/rzfive/ax45mp_sbi.h > > diff --git a/arch/riscv/include/asm/cacheflush.h b/arch/riscv/include/asm/cacheflush.h > index 4a04d1be7c67..3226f3aceafe 100644 > --- a/arch/riscv/include/asm/cacheflush.h > +++ b/arch/riscv/include/asm/cacheflush.h > @@ -61,6 +61,14 @@ static inline void riscv_noncoherent_supported(void) {} > #define SYS_RISCV_FLUSH_ICACHE_LOCAL 1UL > #define SYS_RISCV_FLUSH_ICACHE_ALL (SYS_RISCV_FLUSH_ICACHE_LOCAL) > > +#ifdef CONFIG_AX45MP_L2_CACHE > +extern asmlinkage void ax45mp_no_iocp_cmo(unsigned int cache_size, void *vaddr, > + size_t size, int dir, int ops); > +#else > +inline void ax45mp_no_iocp_cmo(unsigned int cache_size, void *vaddr, > + size_t size, int dir, int ops) {} > +#endif > + > #include <asm-generic/cacheflush.h> > > #endif /* _ASM_RISCV_CACHEFLUSH_H */ > diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h > index 48e899a8e7a9..300fed3bfd80 100644 > --- a/arch/riscv/include/asm/errata_list.h > +++ b/arch/riscv/include/asm/errata_list.h > @@ -125,8 +125,8 @@ asm volatile(ALTERNATIVE( \ > #define THEAD_SYNC_S ".long 0x0190000b" > > #define ALT_CMO_OP(_op, _start, _size, _cachesize, _dir, _ops) \ > -asm volatile(ALTERNATIVE_2( \ > - __nops(6), \ > +asm volatile(ALTERNATIVE_3( \ > + __nops(14), \ > "mv a0, %1\n\t" \ > "j 2f\n\t" \ > "3:\n\t" \ > @@ -134,7 +134,7 @@ asm volatile(ALTERNATIVE_2( \ > "add a0, a0, %0\n\t" \ > "2:\n\t" \ > "bltu a0, %2, 3b\n\t" \ > - "nop", 0, CPUFEATURE_ZICBOM, CONFIG_RISCV_ISA_ZICBOM, \ > + __nops(8), 0, CPUFEATURE_ZICBOM, CONFIG_RISCV_ISA_ZICBOM, \ > "mv a0, %1\n\t" \ > "j 2f\n\t" \ > "3:\n\t" \ > @@ -142,8 +142,28 @@ asm volatile(ALTERNATIVE_2( \ > "add a0, a0, %0\n\t" \ > "2:\n\t" \ > "bltu a0, %2, 3b\n\t" \ > - THEAD_SYNC_S, THEAD_VENDOR_ID, \ > - ERRATA_THEAD_CMO, CONFIG_ERRATA_THEAD_CMO) \ > + THEAD_SYNC_S "\n\t" \ > + __nops(8), THEAD_VENDOR_ID, \ > + ERRATA_THEAD_CMO, CONFIG_ERRATA_THEAD_CMO, \ > + ".option push\n\t\n\t" \ > + ".option norvc\n\t" \ > + ".option norelax\n\t"> \ alternatives already do the norvc + norelax options anyway for old and new instructions, so the .option stuff shouldn't be necessary I guess? > + "addi sp,sp,-16\n\t" \ > + "sd s0,0(sp)\n\t" \ > + "sd ra,8(sp)\n\t" \ > + "addi s0,sp,16\n\t" \ > + "mv a4,%6\n\t" \ > + "mv a3,%5\n\t" \ > + "mv a2,%4\n\t" \ > + "mv a1,%3\n\t" \ > + "mv a0,%0\n\t" \ > + "call ax45mp_no_iocp_cmo\n\t" \ > + "ld ra,8(sp)\n\t" \ > + "ld s0,0(sp)\n\t" \ > + "addi sp,sp,16\n\t" \ > + ".option pop\n\t", \ > + ANDESTECH_VENDOR_ID, ERRATA_ANDESTECH_NO_IOCP, \ > + CONFIG_ERRATA_ANDES_CMO) \ > : : "r"(_cachesize), \ > "r"((unsigned long)(_start) & ~((_cachesize) - 1UL)), \ > "r"((unsigned long)(_start) + (_size)), \ > @@ -151,7 +171,7 @@ asm volatile(ALTERNATIVE_2( \ > "r"((unsigned long)(_size)), \ > "r"((unsigned long)(_dir)), \ > "r"((unsigned long)(_ops)) \ > - : "a0") > + : "a0", "a1", "a2", "a3", "a4", "memory") > > #define THEAD_C9XX_RV_IRQ_PMU 17 > #define THEAD_C9XX_CSR_SCOUNTEROF 0x5c5 [...] > +static int ax45mp_configure_l2_cache(struct device_node *np) > +{ > + int ret; > + > + ret = of_property_read_u32(np, "cache-line-size", &ax45mp_priv->ax45mp_cache_line_size); > + if (ret) { > + pr_err("Failed to get cache-line-size defaulting to 64 bytes\n"); > + ax45mp_priv->ax45mp_cache_line_size = SZ_64; > + } > + > + if (ax45mp_priv->ax45mp_cache_line_size != SZ_64) { > + pr_err("Expected cache-line-size to 64 bytes (found:%u). Defaulting to 64 bytes\n", > + ax45mp_priv->ax45mp_cache_line_size); > + ax45mp_priv->ax45mp_cache_line_size = SZ_64; > + } > + > + ax45mp_priv->ucctl_ok = ax45mp_cpu_cache_controlable(); > + ax45mp_priv->l2cache_enabled = ax45mp_cpu_l2c_ctl_status() & AX45MP_L2_CACHE_CTL_CEN_MASK; > + > + return 0; > +} > + > +static int ax45mp_l2c_probe(struct platform_device *pdev) > +{ > + struct device_node *np = pdev->dev.of_node; > + int ret; > + > + ax45mp_priv = devm_kzalloc(&pdev->dev, sizeof(*ax45mp_priv), GFP_KERNEL); > + if (!ax45mp_priv) > + return -ENOMEM; > + > + ax45mp_priv->l2c_base = devm_of_iomap(&pdev->dev, pdev->dev.of_node, 0, NULL); > + if (!ax45mp_priv->l2c_base) { > + ret = -ENOMEM; > + goto l2c_err; > + } > + > + ret = ax45mp_configure_l2_cache(np); > + if (ret) > + goto l2c_err; > + > + ret = ax45mp_configure_pma_regions(np); > + if (ret) > + goto l2c_err; > + > + static_branch_disable(&ax45mp_l2c_configured); > + > + return 0; > + > +l2c_err: > + devm_kfree(&pdev->dev, ax45mp_priv); > + ax45mp_priv = NULL; > + return ret; > +} > + > +static const struct of_device_id ax45mp_cache_ids[] = { > + { .compatible = "andestech,ax45mp-cache" }, > + { /* sentinel */ } > +}; > + > +static struct platform_driver ax45mp_l2c_driver = { > + .driver = { > + .name = "ax45mp-l2c", > + .of_match_table = ax45mp_cache_ids, > + }, > + .probe = ax45mp_l2c_probe, > +}; > + > +static int __init ax45mp_cache_init(void) > +{ > + static_branch_enable(&ax45mp_l2c_configured); > + return platform_driver_register(&ax45mp_l2c_driver); the ordering is racy I think. I.e. in the function called from the cmo operations (ax45mp*_range) you need to access ax45mp_priv and its line-size element. But when you enable the static branch the driver is not yet registered but even more important, also not probed yet. So I guess the static-branch-enable should be living at the end of ax45mp_l2c_probe() Heiko
Hi Heiko, Thank you for the review. On Thu, Nov 24, 2022 at 6:30 PM Heiko Stübner <heiko@sntech.de> wrote: > > Am Donnerstag, 24. November 2022, 18:22:07 CET schrieb Prabhakar: > > From: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com> > > > > On the AX45MP core, cache coherency is a specification option so it may > > not be supported. In this case DMA will fail. As a workaround, firstly we > > allocate a global dma coherent pool from which DMA allocations are taken > > and marked as non-cacheable + bufferable using the PMA region as specified > > in the device tree. Synchronization callbacks are implemented to > > synchronize when doing DMA transactions. > > > > The Andes AX45MP core has a Programmable Physical Memory Attributes (PMA) > > block that allows dynamic adjustment of memory attributes in the runtime. > > It contains a configurable amount of PMA entries implemented as CSR > > registers to control the attributes of memory locations in interest. > > > > Below are the memory attributes supported: > > * Device, Non-bufferable > > * Device, bufferable > > * Memory, Non-cacheable, Non-bufferable > > * Memory, Non-cacheable, Bufferable > > * Memory, Write-back, No-allocate > > * Memory, Write-back, Read-allocate > > * Memory, Write-back, Write-allocate > > * Memory, Write-back, Read and Write-allocate > > > > This patch adds support to configure the memory attributes of the memory > > regions as passed from the l2 cache node and exposes the cache management > > ops. > > > > More info about PMA (section 10.3): > > Link: http://www.andestech.com/wp-content/uploads/AX45MP-1C-Rev.-5.0.0-Datasheet.pdf > > > > Signed-off-by: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com> > > --- > > RFC v3 -> v4 > > * Made use of runtime patching instead of compile time > > * Now just exposing single function ax45mp_no_iocp_cmo() for CMO handling > > * Added a check to make sure cache line size is always 64 bytes > > * Renamed folder rzf -> rzfive > > * Improved Kconfig description > > * Dropped L2 cache configuration > > * Dropped unnecessary casts > > * Fixed comments pointed by Geert, apart from use of PTR_ALIGN_XYZ() macros. > > --- > > arch/riscv/include/asm/cacheflush.h | 8 + > > arch/riscv/include/asm/errata_list.h | 32 +- > > drivers/soc/renesas/Kconfig | 7 + > > drivers/soc/renesas/Makefile | 2 + > > drivers/soc/renesas/rzfive/Kconfig | 6 + > > drivers/soc/renesas/rzfive/Makefile | 3 + > > drivers/soc/renesas/rzfive/ax45mp_cache.c | 415 ++++++++++++++++++++++ > > drivers/soc/renesas/rzfive/ax45mp_sbi.h | 29 ++ > > 8 files changed, 496 insertions(+), 6 deletions(-) > > create mode 100644 drivers/soc/renesas/rzfive/Kconfig > > create mode 100644 drivers/soc/renesas/rzfive/Makefile > > create mode 100644 drivers/soc/renesas/rzfive/ax45mp_cache.c > > create mode 100644 drivers/soc/renesas/rzfive/ax45mp_sbi.h > > > > diff --git a/arch/riscv/include/asm/cacheflush.h b/arch/riscv/include/asm/cacheflush.h > > index 4a04d1be7c67..3226f3aceafe 100644 > > --- a/arch/riscv/include/asm/cacheflush.h > > +++ b/arch/riscv/include/asm/cacheflush.h > > @@ -61,6 +61,14 @@ static inline void riscv_noncoherent_supported(void) {} > > #define SYS_RISCV_FLUSH_ICACHE_LOCAL 1UL > > #define SYS_RISCV_FLUSH_ICACHE_ALL (SYS_RISCV_FLUSH_ICACHE_LOCAL) > > > > +#ifdef CONFIG_AX45MP_L2_CACHE > > +extern asmlinkage void ax45mp_no_iocp_cmo(unsigned int cache_size, void *vaddr, > > + size_t size, int dir, int ops); > > +#else > > +inline void ax45mp_no_iocp_cmo(unsigned int cache_size, void *vaddr, > > + size_t size, int dir, int ops) {} > > +#endif > > + > > #include <asm-generic/cacheflush.h> > > > > #endif /* _ASM_RISCV_CACHEFLUSH_H */ > > diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h > > index 48e899a8e7a9..300fed3bfd80 100644 > > --- a/arch/riscv/include/asm/errata_list.h > > +++ b/arch/riscv/include/asm/errata_list.h > > @@ -125,8 +125,8 @@ asm volatile(ALTERNATIVE( \ > > #define THEAD_SYNC_S ".long 0x0190000b" > > > > #define ALT_CMO_OP(_op, _start, _size, _cachesize, _dir, _ops) \ > > -asm volatile(ALTERNATIVE_2( \ > > - __nops(6), \ > > +asm volatile(ALTERNATIVE_3( \ > > + __nops(14), \ > > "mv a0, %1\n\t" \ > > "j 2f\n\t" \ > > "3:\n\t" \ > > @@ -134,7 +134,7 @@ asm volatile(ALTERNATIVE_2( \ > > "add a0, a0, %0\n\t" \ > > "2:\n\t" \ > > "bltu a0, %2, 3b\n\t" \ > > - "nop", 0, CPUFEATURE_ZICBOM, CONFIG_RISCV_ISA_ZICBOM, \ > > + __nops(8), 0, CPUFEATURE_ZICBOM, CONFIG_RISCV_ISA_ZICBOM, \ > > "mv a0, %1\n\t" \ > > "j 2f\n\t" \ > > "3:\n\t" \ > > @@ -142,8 +142,28 @@ asm volatile(ALTERNATIVE_2( \ > > "add a0, a0, %0\n\t" \ > > "2:\n\t" \ > > "bltu a0, %2, 3b\n\t" \ > > - THEAD_SYNC_S, THEAD_VENDOR_ID, \ > > - ERRATA_THEAD_CMO, CONFIG_ERRATA_THEAD_CMO) \ > > + THEAD_SYNC_S "\n\t" \ > > + __nops(8), THEAD_VENDOR_ID, \ > > + ERRATA_THEAD_CMO, CONFIG_ERRATA_THEAD_CMO, \ > > + ".option push\n\t\n\t" \ > > + ".option norvc\n\t" \ > > + ".option norelax\n\t"> \ > > alternatives already do the norvc + norelax options anyway for old and new instructions, > so the .option stuff shouldn't be necessary I guess? > I did a quick run with .option stuff and all seems to be OK. I'll do some rigorous testing and get rid of it in the next version. > > > + "addi sp,sp,-16\n\t" \ > > + "sd s0,0(sp)\n\t" \ > > + "sd ra,8(sp)\n\t" \ > > + "addi s0,sp,16\n\t" \ > > + "mv a4,%6\n\t" \ > > + "mv a3,%5\n\t" \ > > + "mv a2,%4\n\t" \ > > + "mv a1,%3\n\t" \ > > + "mv a0,%0\n\t" \ > > + "call ax45mp_no_iocp_cmo\n\t" \ > > + "ld ra,8(sp)\n\t" \ > > + "ld s0,0(sp)\n\t" \ > > + "addi sp,sp,16\n\t" \ > > + ".option pop\n\t", \ > > + ANDESTECH_VENDOR_ID, ERRATA_ANDESTECH_NO_IOCP, \ > > + CONFIG_ERRATA_ANDES_CMO) \ > > : : "r"(_cachesize), \ > > "r"((unsigned long)(_start) & ~((_cachesize) - 1UL)), \ > > "r"((unsigned long)(_start) + (_size)), \ > > @@ -151,7 +171,7 @@ asm volatile(ALTERNATIVE_2( \ > > "r"((unsigned long)(_size)), \ > > "r"((unsigned long)(_dir)), \ > > "r"((unsigned long)(_ops)) \ > > - : "a0") > > + : "a0", "a1", "a2", "a3", "a4", "memory") > > > > #define THEAD_C9XX_RV_IRQ_PMU 17 > > #define THEAD_C9XX_CSR_SCOUNTEROF 0x5c5 <snip> > > +static int ax45mp_l2c_probe(struct platform_device *pdev) > > +{ > > + struct device_node *np = pdev->dev.of_node; > > + int ret; > > + > > + ax45mp_priv = devm_kzalloc(&pdev->dev, sizeof(*ax45mp_priv), GFP_KERNEL); > > + if (!ax45mp_priv) > > + return -ENOMEM; > > + > > + ax45mp_priv->l2c_base = devm_of_iomap(&pdev->dev, pdev->dev.of_node, 0, NULL); > > + if (!ax45mp_priv->l2c_base) { > > + ret = -ENOMEM; > > + goto l2c_err; > > + } > > + > > + ret = ax45mp_configure_l2_cache(np); > > + if (ret) > > + goto l2c_err; > > + > > + ret = ax45mp_configure_pma_regions(np); > > + if (ret) > > + goto l2c_err; > > + > > + static_branch_disable(&ax45mp_l2c_configured); > > + > > + return 0; > > + > > +l2c_err: > > + devm_kfree(&pdev->dev, ax45mp_priv); > > + ax45mp_priv = NULL; > > + return ret; > > +} > > + > > +static const struct of_device_id ax45mp_cache_ids[] = { > > + { .compatible = "andestech,ax45mp-cache" }, > > + { /* sentinel */ } > > +}; > > + > > +static struct platform_driver ax45mp_l2c_driver = { > > + .driver = { > > + .name = "ax45mp-l2c", > > + .of_match_table = ax45mp_cache_ids, > > + }, > > + .probe = ax45mp_l2c_probe, > > +}; > > + > > +static int __init ax45mp_cache_init(void) > > +{ > > + static_branch_enable(&ax45mp_l2c_configured); > > + return platform_driver_register(&ax45mp_l2c_driver); > > the ordering is racy I think. > > I.e. in the function called from the cmo operations (ax45mp*_range) > you need to access ax45mp_priv and its line-size element. > > But when you enable the static branch the driver is not yet registered > but even more important, also not probed yet. > > So I guess the static-branch-enable should be living at the end of > ax45mp_l2c_probe() > Hmm so my understanding is incorrect. static_branch_unlikely() - evaluates to false when static_branch_enable() is called static_branch_unlikely() - evaluates to true when static_branch_disable() is called Is that what you meant? Cheers, Prabhakar
Am Donnerstag, 24. November 2022, 20:56:39 CET schrieb Lad, Prabhakar: > Hi Heiko, > > Thank you for the review. > > On Thu, Nov 24, 2022 at 6:30 PM Heiko Stübner <heiko@sntech.de> wrote: > > > > Am Donnerstag, 24. November 2022, 18:22:07 CET schrieb Prabhakar: > > > From: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com> > > > > > > On the AX45MP core, cache coherency is a specification option so it may > > > not be supported. In this case DMA will fail. As a workaround, firstly we > > > allocate a global dma coherent pool from which DMA allocations are taken > > > and marked as non-cacheable + bufferable using the PMA region as specified > > > in the device tree. Synchronization callbacks are implemented to > > > synchronize when doing DMA transactions. > > > > > > The Andes AX45MP core has a Programmable Physical Memory Attributes (PMA) > > > block that allows dynamic adjustment of memory attributes in the runtime. > > > It contains a configurable amount of PMA entries implemented as CSR > > > registers to control the attributes of memory locations in interest. > > > > > > Below are the memory attributes supported: > > > * Device, Non-bufferable > > > * Device, bufferable > > > * Memory, Non-cacheable, Non-bufferable > > > * Memory, Non-cacheable, Bufferable > > > * Memory, Write-back, No-allocate > > > * Memory, Write-back, Read-allocate > > > * Memory, Write-back, Write-allocate > > > * Memory, Write-back, Read and Write-allocate > > > > > > This patch adds support to configure the memory attributes of the memory > > > regions as passed from the l2 cache node and exposes the cache management > > > ops. > > > > > > More info about PMA (section 10.3): > > > Link: http://www.andestech.com/wp-content/uploads/AX45MP-1C-Rev.-5.0.0-Datasheet.pdf > > > > > > Signed-off-by: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com> > > > --- > > > RFC v3 -> v4 > > > * Made use of runtime patching instead of compile time > > > * Now just exposing single function ax45mp_no_iocp_cmo() for CMO handling > > > * Added a check to make sure cache line size is always 64 bytes > > > * Renamed folder rzf -> rzfive > > > * Improved Kconfig description > > > * Dropped L2 cache configuration > > > * Dropped unnecessary casts > > > * Fixed comments pointed by Geert, apart from use of PTR_ALIGN_XYZ() macros. > > > --- > > > arch/riscv/include/asm/cacheflush.h | 8 + > > > arch/riscv/include/asm/errata_list.h | 32 +- > > > drivers/soc/renesas/Kconfig | 7 + > > > drivers/soc/renesas/Makefile | 2 + > > > drivers/soc/renesas/rzfive/Kconfig | 6 + > > > drivers/soc/renesas/rzfive/Makefile | 3 + > > > drivers/soc/renesas/rzfive/ax45mp_cache.c | 415 ++++++++++++++++++++++ > > > drivers/soc/renesas/rzfive/ax45mp_sbi.h | 29 ++ > > > 8 files changed, 496 insertions(+), 6 deletions(-) > > > create mode 100644 drivers/soc/renesas/rzfive/Kconfig > > > create mode 100644 drivers/soc/renesas/rzfive/Makefile > > > create mode 100644 drivers/soc/renesas/rzfive/ax45mp_cache.c > > > create mode 100644 drivers/soc/renesas/rzfive/ax45mp_sbi.h > > > > > > diff --git a/arch/riscv/include/asm/cacheflush.h b/arch/riscv/include/asm/cacheflush.h > > > index 4a04d1be7c67..3226f3aceafe 100644 > > > --- a/arch/riscv/include/asm/cacheflush.h > > > +++ b/arch/riscv/include/asm/cacheflush.h > > > @@ -61,6 +61,14 @@ static inline void riscv_noncoherent_supported(void) {} > > > #define SYS_RISCV_FLUSH_ICACHE_LOCAL 1UL > > > #define SYS_RISCV_FLUSH_ICACHE_ALL (SYS_RISCV_FLUSH_ICACHE_LOCAL) > > > > > > +#ifdef CONFIG_AX45MP_L2_CACHE > > > +extern asmlinkage void ax45mp_no_iocp_cmo(unsigned int cache_size, void *vaddr, > > > + size_t size, int dir, int ops); > > > +#else > > > +inline void ax45mp_no_iocp_cmo(unsigned int cache_size, void *vaddr, > > > + size_t size, int dir, int ops) {} > > > +#endif > > > + > > > #include <asm-generic/cacheflush.h> > > > > > > #endif /* _ASM_RISCV_CACHEFLUSH_H */ > > > diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h > > > index 48e899a8e7a9..300fed3bfd80 100644 > > > --- a/arch/riscv/include/asm/errata_list.h > > > +++ b/arch/riscv/include/asm/errata_list.h > > > @@ -125,8 +125,8 @@ asm volatile(ALTERNATIVE( \ > > > #define THEAD_SYNC_S ".long 0x0190000b" > > > > > > #define ALT_CMO_OP(_op, _start, _size, _cachesize, _dir, _ops) \ > > > -asm volatile(ALTERNATIVE_2( \ > > > - __nops(6), \ > > > +asm volatile(ALTERNATIVE_3( \ > > > + __nops(14), \ > > > "mv a0, %1\n\t" \ > > > "j 2f\n\t" \ > > > "3:\n\t" \ > > > @@ -134,7 +134,7 @@ asm volatile(ALTERNATIVE_2( \ > > > "add a0, a0, %0\n\t" \ > > > "2:\n\t" \ > > > "bltu a0, %2, 3b\n\t" \ > > > - "nop", 0, CPUFEATURE_ZICBOM, CONFIG_RISCV_ISA_ZICBOM, \ > > > + __nops(8), 0, CPUFEATURE_ZICBOM, CONFIG_RISCV_ISA_ZICBOM, \ > > > "mv a0, %1\n\t" \ > > > "j 2f\n\t" \ > > > "3:\n\t" \ > > > @@ -142,8 +142,28 @@ asm volatile(ALTERNATIVE_2( \ > > > "add a0, a0, %0\n\t" \ > > > "2:\n\t" \ > > > "bltu a0, %2, 3b\n\t" \ > > > - THEAD_SYNC_S, THEAD_VENDOR_ID, \ > > > - ERRATA_THEAD_CMO, CONFIG_ERRATA_THEAD_CMO) \ > > > + THEAD_SYNC_S "\n\t" \ > > > + __nops(8), THEAD_VENDOR_ID, \ > > > + ERRATA_THEAD_CMO, CONFIG_ERRATA_THEAD_CMO, \ > > > + ".option push\n\t\n\t" \ > > > + ".option norvc\n\t" \ > > > + ".option norelax\n\t"> \ > > > > alternatives already do the norvc + norelax options anyway for old and new instructions, > > so the .option stuff shouldn't be necessary I guess? > > > I did a quick run with .option stuff and all seems to be OK. I'll do > some rigorous testing and get rid of it in the next version. > > > > > + "addi sp,sp,-16\n\t" \ > > > + "sd s0,0(sp)\n\t" \ > > > + "sd ra,8(sp)\n\t" \ > > > + "addi s0,sp,16\n\t" \ > > > + "mv a4,%6\n\t" \ > > > + "mv a3,%5\n\t" \ > > > + "mv a2,%4\n\t" \ > > > + "mv a1,%3\n\t" \ > > > + "mv a0,%0\n\t" \ > > > + "call ax45mp_no_iocp_cmo\n\t" \ > > > + "ld ra,8(sp)\n\t" \ > > > + "ld s0,0(sp)\n\t" \ > > > + "addi sp,sp,16\n\t" \ > > > + ".option pop\n\t", \ > > > + ANDESTECH_VENDOR_ID, ERRATA_ANDESTECH_NO_IOCP, \ > > > + CONFIG_ERRATA_ANDES_CMO) \ > > > : : "r"(_cachesize), \ > > > "r"((unsigned long)(_start) & ~((_cachesize) - 1UL)), \ > > > "r"((unsigned long)(_start) + (_size)), \ > > > @@ -151,7 +171,7 @@ asm volatile(ALTERNATIVE_2( \ > > > "r"((unsigned long)(_size)), \ > > > "r"((unsigned long)(_dir)), \ > > > "r"((unsigned long)(_ops)) \ > > > - : "a0") > > > + : "a0", "a1", "a2", "a3", "a4", "memory") > > > > > > #define THEAD_C9XX_RV_IRQ_PMU 17 > > > #define THEAD_C9XX_CSR_SCOUNTEROF 0x5c5 > <snip> > > > +static int ax45mp_l2c_probe(struct platform_device *pdev) > > > +{ > > > + struct device_node *np = pdev->dev.of_node; > > > + int ret; > > > + > > > + ax45mp_priv = devm_kzalloc(&pdev->dev, sizeof(*ax45mp_priv), GFP_KERNEL); > > > + if (!ax45mp_priv) > > > + return -ENOMEM; > > > + > > > + ax45mp_priv->l2c_base = devm_of_iomap(&pdev->dev, pdev->dev.of_node, 0, NULL); > > > + if (!ax45mp_priv->l2c_base) { > > > + ret = -ENOMEM; > > > + goto l2c_err; > > > + } > > > + > > > + ret = ax45mp_configure_l2_cache(np); > > > + if (ret) > > > + goto l2c_err; > > > + > > > + ret = ax45mp_configure_pma_regions(np); > > > + if (ret) > > > + goto l2c_err; > > > + > > > + static_branch_disable(&ax45mp_l2c_configured); > > > + > > > + return 0; > > > + > > > +l2c_err: > > > + devm_kfree(&pdev->dev, ax45mp_priv); > > > + ax45mp_priv = NULL; > > > + return ret; > > > +} > > > + > > > +static const struct of_device_id ax45mp_cache_ids[] = { > > > + { .compatible = "andestech,ax45mp-cache" }, > > > + { /* sentinel */ } > > > +}; > > > + > > > +static struct platform_driver ax45mp_l2c_driver = { > > > + .driver = { > > > + .name = "ax45mp-l2c", > > > + .of_match_table = ax45mp_cache_ids, > > > + }, > > > + .probe = ax45mp_l2c_probe, > > > +}; > > > + > > > +static int __init ax45mp_cache_init(void) > > > +{ > > > + static_branch_enable(&ax45mp_l2c_configured); > > > + return platform_driver_register(&ax45mp_l2c_driver); > > > > the ordering is racy I think. > > > > I.e. in the function called from the cmo operations (ax45mp*_range) > > you need to access ax45mp_priv and its line-size element. > > > > But when you enable the static branch the driver is not yet registered > > but even more important, also not probed yet. > > > > So I guess the static-branch-enable should be living at the end of > > ax45mp_l2c_probe() > > > Hmm so my understanding is incorrect. > > static_branch_unlikely() - evaluates to false when > static_branch_enable() is called > static_branch_unlikely() - evaluates to true when > static_branch_disable() is called > > Is that what you meant? That is an issue as well :-) I.e. static_branch_* will always return true when enabled and false when disabled. The likely / unlikely suffix is a mechanism for runtime performance, i.e. unlikely means that you expect the static-key to be false in _most_ cases, where likely means you expect it to be true in most cases. But also - arch_initcall(ax45mp_cache_init); - does static-branch enable - registers platform_driver - platform_driver probes - kzalloc(priv) ... [1] - priv->line_size from dt at [1] your condition could already be fullfilled but you don't have a cache-line-size yet. Heiko
On Thu, Nov 24, 2022 at 05:22:07PM +0000, Prabhakar wrote: > From: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com> > > On the AX45MP core, cache coherency is a specification option so it may How about: "Cache coherency is an option feature of the AX45MP core, so it may not be supported." I keep finding that sentence kinda hard.. > In this case DMA will fail. "The AX45MP predates the standard extensions for cache management, so an alternate approach is required to support non-coherent DMA for SoCs where this feature is not available, such as the Renesas RZ/Five." (you've gotta explain somewhere why this is in drivers/soc/renesas lol) > As a workaround, firstly we How about: " Since the cache management instructions cannot be used, we instead allocate..." > allocate a global dma coherent pool from which DMA allocations are taken > and marked as non-cacheable + bufferable using the PMA region as specified > in the device tree. Synchronization callbacks are implemented to > synchronize when doing DMA transactions. > > The Andes AX45MP core has a Programmable Physical Memory Attributes (PMA) > block that allows dynamic adjustment of memory attributes in the runtime. > It contains a configurable amount of PMA entries implemented as CSR > registers to control the attributes of memory locations in interest. > > Below are the memory attributes supported: > * Device, Non-bufferable > * Device, bufferable > * Memory, Non-cacheable, Non-bufferable > * Memory, Non-cacheable, Bufferable > * Memory, Write-back, No-allocate > * Memory, Write-back, Read-allocate > * Memory, Write-back, Write-allocate > * Memory, Write-back, Read and Write-allocate > > This patch adds support to configure the memory attributes of the memory > regions as passed from the l2 cache node and exposes the cache management > ops. > > More info about PMA (section 10.3): > Link: http://www.andestech.com/wp-content/uploads/AX45MP-1C-Rev.-5.0.0-Datasheet.pdf But yeah, this is basically the sort of stuff that'd be nice to have in the previous patch! > Signed-off-by: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com> > --- > RFC v3 -> v4 > * Made use of runtime patching instead of compile time > * Now just exposing single function ax45mp_no_iocp_cmo() for CMO handling > * Added a check to make sure cache line size is always 64 bytes > * Renamed folder rzf -> rzfive > * Improved Kconfig description > * Dropped L2 cache configuration > * Dropped unnecessary casts > * Fixed comments pointed by Geert, apart from use of PTR_ALIGN_XYZ() macros. > --- > arch/riscv/include/asm/cacheflush.h | 8 + > arch/riscv/include/asm/errata_list.h | 32 +- > drivers/soc/renesas/Kconfig | 7 + > drivers/soc/renesas/Makefile | 2 + > drivers/soc/renesas/rzfive/Kconfig | 6 + > drivers/soc/renesas/rzfive/Makefile | 3 + > drivers/soc/renesas/rzfive/ax45mp_cache.c | 415 ++++++++++++++++++++++ > drivers/soc/renesas/rzfive/ax45mp_sbi.h | 29 ++ > 8 files changed, 496 insertions(+), 6 deletions(-) > create mode 100644 drivers/soc/renesas/rzfive/Kconfig > create mode 100644 drivers/soc/renesas/rzfive/Makefile > create mode 100644 drivers/soc/renesas/rzfive/ax45mp_cache.c > create mode 100644 drivers/soc/renesas/rzfive/ax45mp_sbi.h > diff --git a/drivers/soc/renesas/Kconfig b/drivers/soc/renesas/Kconfig > index 660498252ec5..e7810256c60d 100644 > --- a/drivers/soc/renesas/Kconfig > +++ b/drivers/soc/renesas/Kconfig > @@ -340,9 +340,16 @@ if RISCV > config ARCH_R9A07G043 > bool "RISC-V Platform support for RZ/Five" > select ARCH_RZG2L > + select AX45MP_L2_CACHE > + select DMA_GLOBAL_POOL > + select ERRATA_ANDES > + select ERRATA_ANDES_CMO > + select RISCV_DMA_NONCOHERENT Is this not redundant due to the select by ERRATA_ANDES_CMO? > help > This enables support for the Renesas RZ/Five SoC. > > +source "drivers/soc/renesas/rzfive/Kconfig" > + > endif # RISCV > > config RST_RCAR > diff --git a/drivers/soc/renesas/rzfive/Makefile b/drivers/soc/renesas/rzfive/Makefile > new file mode 100644 > index 000000000000..2012e7fb978d > --- /dev/null > +++ b/drivers/soc/renesas/rzfive/Makefile > @@ -0,0 +1,3 @@ > +# SPDX-License-Identifier: GPL-2.0 > + > +obj-$(CONFIG_AX45MP_L2_CACHE) += ax45mp_cache.o > diff --git a/drivers/soc/renesas/rzfive/ax45mp_cache.c b/drivers/soc/renesas/rzfive/ax45mp_cache.c > new file mode 100644 > index 000000000000..4e0d0545d3af Mainly just whizzing through the driver itself.. > +static int ax45mp_configure_pma_regions(struct device_node *np) > +{ > + const char *propname = "andestech,pma-regions"; > + u32 start, size, flags; > + unsigned int entry_id; > + unsigned int i; > + int count; > + int ret; > + > + count = of_property_count_elems_of_size(np, propname, sizeof(u32) * 3); > + if (count < 0) > + return count; > + > + if (count > AX45MP_MAX_PMA_REGIONS) > + return -EINVAL; > + > + for (i = 0, entry_id = 0 ; entry_id < count ; i += 3, entry_id++) { > + of_property_read_u32_index(np, propname, i, &start); > + of_property_read_u32_index(np, propname, i + 1, &size); > + of_property_read_u32_index(np, propname, i + 2, &flags); > + ret = ax45mp_sbi_set_pma(start, size, flags, entry_id); > + if (!ret) > + pr_err("Failed to setup PMA region 0x%x - 0x%x flags: 0x%x", > + start, start + size, flags); I have to ask - is it okay to just continue here if a RMA region setup fails? > + } > + > + return 0; > +} > + > +static bool ax45mp_cpu_cache_controlable(void) > +{ > + return (((ax45mp_cpu_get_micm_cfg_status() & AX45MP_MICM_CFG_ISZ_MASK) || > + (ax45mp_cpu_get_mdcm_cfg_status() & AX45MP_MDCM_CFG_DSZ_MASK)) && > + (ax45mp_cpu_get_misa_cfg_status() & AX45MP_MISA_20_MASK) && > + (ax45mp_cpu_get_mmsc_cfg_status() & AX45MP_MMSC_CFG_CCTLCSR_MASK) && > + (ax45mp_cpu_get_mcache_ctl_status() & AX45MP_MCACHE_CTL_CCTL_SUEN_MASK)); That's a bit of a mouthful lol! > +static void ax45mp_cpu_dma_inval_range(void *vaddr, size_t size) Not mine to look after so /shrug but this looks like the sort of thing that could do with a comment or two explaining the invalidation process. > +{ > + char cache_buf[2][AX45MP_MAX_CACHE_LINE_SIZE]; > + unsigned long start = (unsigned long)vaddr; > + unsigned long end = start + size; > + unsigned long old_start = start; > + unsigned long old_end = end; > + unsigned long line_size; > + unsigned long flags; > + > + if (static_branch_unlikely(&ax45mp_l2c_configured) && !ax45mp_priv) > + return; > + > + if (unlikely(start == end)) > + return; > + > + line_size = ax45mp_priv->ax45mp_cache_line_size; > + > + memset(&cache_buf, 0x0, sizeof(cache_buf)); > + start = start & (~(line_size - 1)); > + end = ((end + line_size - 1) & (~(line_size - 1))); > + > + local_irq_save(flags); > + if (unlikely(start != old_start)) > + memcpy(&cache_buf[0][0], (void *)start, line_size); > + > + if (unlikely(end != old_end)) > + memcpy(&cache_buf[1][0], (void *)(old_end & (~(line_size - 1))), line_size); > + > + ax45mp_cpu_dcache_inval_range(vaddr, (void *)end, line_size); > + > + if (unlikely(start != old_start)) > + memcpy((void *)start, &cache_buf[0][0], (old_start & (line_size - 1))); > + > + if (unlikely(end != old_end)) > + memcpy((void *)(old_end + 1), > + &cache_buf[1][(old_end & (line_size - 1)) + 1], > + end - old_end - 1); > + > + local_irq_restore(flags); > +} > +static int ax45mp_configure_l2_cache(struct device_node *np) > +{ > + int ret; > + > + ret = of_property_read_u32(np, "cache-line-size", &ax45mp_priv->ax45mp_cache_line_size); > + if (ret) { > + pr_err("Failed to get cache-line-size defaulting to 64 bytes\n"); ^ Looks like you need a comma here... > + ax45mp_priv->ax45mp_cache_line_size = SZ_64; > + } > + > + if (ax45mp_priv->ax45mp_cache_line_size != SZ_64) { > + pr_err("Expected cache-line-size to 64 bytes (found:%u). Defaulting to 64 bytes\n", ^ ...and a "be" here. Would you also benefit from a pr_fmt here since you have no device? Or else you could save the dev to your ax45mp_priv and avail of dev_err here? > + ax45mp_priv->ax45mp_cache_line_size); > + ax45mp_priv->ax45mp_cache_line_size = SZ_64; > + } > + > + ax45mp_priv->ucctl_ok = ax45mp_cpu_cache_controlable(); ^ That function name is a typo, should be called ax45mp_cpu_cache_cache_controllable(). > + ax45mp_priv->l2cache_enabled = ax45mp_cpu_l2c_ctl_status() & AX45MP_L2_CACHE_CTL_CEN_MASK; > + > + return 0; > +} > + > +static int ax45mp_l2c_probe(struct platform_device *pdev) > +{ > + struct device_node *np = pdev->dev.of_node; > + int ret; > + > + ax45mp_priv = devm_kzalloc(&pdev->dev, sizeof(*ax45mp_priv), GFP_KERNEL); > + if (!ax45mp_priv) > + return -ENOMEM; > + > + ax45mp_priv->l2c_base = devm_of_iomap(&pdev->dev, pdev->dev.of_node, 0, NULL); > + if (!ax45mp_priv->l2c_base) { > + ret = -ENOMEM; > + goto l2c_err; > + } > + > + ret = ax45mp_configure_l2_cache(np); > + if (ret) > + goto l2c_err; > + > + ret = ax45mp_configure_pma_regions(np); > + if (ret) > + goto l2c_err; > + > + static_branch_disable(&ax45mp_l2c_configured); > + > + return 0; > + > +l2c_err: > + devm_kfree(&pdev->dev, ax45mp_priv); > + ax45mp_priv = NULL; > + return ret; > +} > diff --git a/drivers/soc/renesas/rzfive/ax45mp_sbi.h b/drivers/soc/renesas/rzfive/ax45mp_sbi.h > new file mode 100644 > index 000000000000..1604874954d0 > --- /dev/null > +++ b/drivers/soc/renesas/rzfive/ax45mp_sbi.h > @@ -0,0 +1,29 @@ > +/* SPDX-License-Identifier: GPL-2.0+ */ > + > +#ifndef __AX45MP_SBI_H > +#define __AX45MP_SBI_H > + > +#define SBI_EXT_ANDES 0x0900031E > + > +enum ax45mp_sbi_ext_fid { > + AX45MP_SBI_EXT_GET_MCACHE_CTL_STATUS = 0, Is that zero not implied? > + AX45MP_SBI_EXT_GET_MMISC_CTL_STATUS, > + AX45MP_SBI_EXT_SET_MCACHE_CTL, > + AX45MP_SBI_EXT_SET_MMISC_CTL, > + AX45MP_SBI_EXT_ICACHE_OP, > + AX45MP_SBI_EXT_DCACHE_OP, > + AX45MP_SBI_EXT_L1CACHE_I_PREFETCH, > + AX45MP_SBI_EXT_L1CACHE_D_PREFETCH, > + AX45MP_SBI_EXT_NON_BLOCKING_LOAD_STORE, > + AX45MP_SBI_EXT_WRITE_AROUND, > + AX45MP_SBI_EXT_SET_PMA, > + AX45MP_SBI_EXT_FREE_PMA, > + AX45MP_SBI_EXT_PROBE_PMA, > + AX45MP_SBI_EXT_DCACHE_WBINVAL_ALL, > + AX45MP_SBI_EXT_GET_MICM_CTL_STATUS, > + AX45MP_SBI_EXT_GET_MDCM_CTL_STATUS, > + AX45MP_SBI_EXT_GET_MMSC_CTL_STATUS, > + AX45MP_SBI_EXT_GET_MISA_CTL_STATUS, > +}; > + > +#endif > -- > 2.25.1 >
On Thu, Nov 24, 2022 at 09:31:42PM +0000, Conor Dooley wrote: > On Thu, Nov 24, 2022 at 05:22:07PM +0000, Prabhakar wrote: > > From: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com> > > + ax45mp_priv->ucctl_ok = ax45mp_cpu_cache_controlable(); > That function name is a typo, should be called ax45mp_cpu_cache_cache_controllable(). And so is my suggestion! s/cache_cache/cache/
Hi Conor, Thank you for the review. On Thu, Nov 24, 2022 at 9:31 PM Conor Dooley <conor@kernel.org> wrote: > > On Thu, Nov 24, 2022 at 05:22:07PM +0000, Prabhakar wrote: > > From: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com> > > > > On the AX45MP core, cache coherency is a specification option so it may > > How about: > "Cache coherency is an option feature of the AX45MP core, so it may not > be supported." > Sure, I'll update that. > I keep finding that sentence kinda hard.. > > > In this case DMA will fail. > "The AX45MP predates the standard extensions for cache management, so an > alternate approach is required to support non-coherent DMA for SoCs > where this feature is not available, such as the Renesas RZ/Five." > > (you've gotta explain somewhere why this is in drivers/soc/renesas lol) > I dont want to touch that fiery topic ;) > > As a workaround, firstly we > > How about: > " Since the cache management instructions cannot be used, we instead > allocate..." > Sure, I'll update that. > > allocate a global dma coherent pool from which DMA allocations are taken > > and marked as non-cacheable + bufferable using the PMA region as specified > > in the device tree. Synchronization callbacks are implemented to > > synchronize when doing DMA transactions. > > > > The Andes AX45MP core has a Programmable Physical Memory Attributes (PMA) > > block that allows dynamic adjustment of memory attributes in the runtime. > > It contains a configurable amount of PMA entries implemented as CSR > > registers to control the attributes of memory locations in interest. > > > > Below are the memory attributes supported: > > * Device, Non-bufferable > > * Device, bufferable > > * Memory, Non-cacheable, Non-bufferable > > * Memory, Non-cacheable, Bufferable > > * Memory, Write-back, No-allocate > > * Memory, Write-back, Read-allocate > > * Memory, Write-back, Write-allocate > > * Memory, Write-back, Read and Write-allocate > > > > This patch adds support to configure the memory attributes of the memory > > regions as passed from the l2 cache node and exposes the cache management > > ops. > > > > More info about PMA (section 10.3): > > Link: http://www.andestech.com/wp-content/uploads/AX45MP-1C-Rev.-5.0.0-Datasheet.pdf > > But yeah, this is basically the sort of stuff that'd be nice to have in > the previous patch! > Agreed, I'll include this in the binding patch too. > > Signed-off-by: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com> > > --- > > RFC v3 -> v4 > > * Made use of runtime patching instead of compile time > > * Now just exposing single function ax45mp_no_iocp_cmo() for CMO handling > > * Added a check to make sure cache line size is always 64 bytes > > * Renamed folder rzf -> rzfive > > * Improved Kconfig description > > * Dropped L2 cache configuration > > * Dropped unnecessary casts > > * Fixed comments pointed by Geert, apart from use of PTR_ALIGN_XYZ() macros. > > --- > > arch/riscv/include/asm/cacheflush.h | 8 + > > arch/riscv/include/asm/errata_list.h | 32 +- > > drivers/soc/renesas/Kconfig | 7 + > > drivers/soc/renesas/Makefile | 2 + > > drivers/soc/renesas/rzfive/Kconfig | 6 + > > drivers/soc/renesas/rzfive/Makefile | 3 + > > drivers/soc/renesas/rzfive/ax45mp_cache.c | 415 ++++++++++++++++++++++ > > drivers/soc/renesas/rzfive/ax45mp_sbi.h | 29 ++ > > 8 files changed, 496 insertions(+), 6 deletions(-) > > create mode 100644 drivers/soc/renesas/rzfive/Kconfig > > create mode 100644 drivers/soc/renesas/rzfive/Makefile > > create mode 100644 drivers/soc/renesas/rzfive/ax45mp_cache.c > > create mode 100644 drivers/soc/renesas/rzfive/ax45mp_sbi.h > > > diff --git a/drivers/soc/renesas/Kconfig b/drivers/soc/renesas/Kconfig > > index 660498252ec5..e7810256c60d 100644 > > --- a/drivers/soc/renesas/Kconfig > > +++ b/drivers/soc/renesas/Kconfig > > @@ -340,9 +340,16 @@ if RISCV > > config ARCH_R9A07G043 > > bool "RISC-V Platform support for RZ/Five" > > select ARCH_RZG2L > > + select AX45MP_L2_CACHE > > + select DMA_GLOBAL_POOL > > + select ERRATA_ANDES > > + select ERRATA_ANDES_CMO > > + select RISCV_DMA_NONCOHERENT > > Is this not redundant due to the select by ERRATA_ANDES_CMO? > Indeed, I'll drop it. > > help > > This enables support for the Renesas RZ/Five SoC. > > > > +source "drivers/soc/renesas/rzfive/Kconfig" > > + > > endif # RISCV > > > > config RST_RCAR > > > diff --git a/drivers/soc/renesas/rzfive/Makefile b/drivers/soc/renesas/rzfive/Makefile > > new file mode 100644 > > index 000000000000..2012e7fb978d > > --- /dev/null > > +++ b/drivers/soc/renesas/rzfive/Makefile > > @@ -0,0 +1,3 @@ > > +# SPDX-License-Identifier: GPL-2.0 > > + > > +obj-$(CONFIG_AX45MP_L2_CACHE) += ax45mp_cache.o > > diff --git a/drivers/soc/renesas/rzfive/ax45mp_cache.c b/drivers/soc/renesas/rzfive/ax45mp_cache.c > > new file mode 100644 > > index 000000000000..4e0d0545d3af > > Mainly just whizzing through the driver itself.. > > > +static int ax45mp_configure_pma_regions(struct device_node *np) > > +{ > > + const char *propname = "andestech,pma-regions"; > > + u32 start, size, flags; > > + unsigned int entry_id; > > + unsigned int i; > > + int count; > > + int ret; > > + > > + count = of_property_count_elems_of_size(np, propname, sizeof(u32) * 3); > > + if (count < 0) > > + return count; > > + > > + if (count > AX45MP_MAX_PMA_REGIONS) > > + return -EINVAL; > > + > > + for (i = 0, entry_id = 0 ; entry_id < count ; i += 3, entry_id++) { > > + of_property_read_u32_index(np, propname, i, &start); > > + of_property_read_u32_index(np, propname, i + 1, &size); > > + of_property_read_u32_index(np, propname, i + 2, &flags); > > + ret = ax45mp_sbi_set_pma(start, size, flags, entry_id); > > + if (!ret) > > + pr_err("Failed to setup PMA region 0x%x - 0x%x flags: 0x%x", > > + start, start + size, flags); > > I have to ask - is it okay to just continue here if a RMA region setup > fails? > Ok so incase of failures "ebreak" is called, so continuing doesn't cause any harm I guess. > > + } > > + > > + return 0; > > +} > > + > > > +static bool ax45mp_cpu_cache_controlable(void) > > +{ > > + return (((ax45mp_cpu_get_micm_cfg_status() & AX45MP_MICM_CFG_ISZ_MASK) || > > + (ax45mp_cpu_get_mdcm_cfg_status() & AX45MP_MDCM_CFG_DSZ_MASK)) && > > + (ax45mp_cpu_get_misa_cfg_status() & AX45MP_MISA_20_MASK) && > > + (ax45mp_cpu_get_mmsc_cfg_status() & AX45MP_MMSC_CFG_CCTLCSR_MASK) && > > + (ax45mp_cpu_get_mcache_ctl_status() & AX45MP_MCACHE_CTL_CCTL_SUEN_MASK)); > > That's a bit of a mouthful lol! > > > > +static void ax45mp_cpu_dma_inval_range(void *vaddr, size_t size) > > Not mine to look after so /shrug but this looks like the sort of thing > that could do with a comment or two explaining the invalidation process. > Sure, I'll add some comments. > > +{ > > + char cache_buf[2][AX45MP_MAX_CACHE_LINE_SIZE]; > > + unsigned long start = (unsigned long)vaddr; > > + unsigned long end = start + size; > > + unsigned long old_start = start; > > + unsigned long old_end = end; > > + unsigned long line_size; > > + unsigned long flags; > > + > > + if (static_branch_unlikely(&ax45mp_l2c_configured) && !ax45mp_priv) > > + return; > > + > > + if (unlikely(start == end)) > > + return; > > + > > + line_size = ax45mp_priv->ax45mp_cache_line_size; > > + > > + memset(&cache_buf, 0x0, sizeof(cache_buf)); > > + start = start & (~(line_size - 1)); > > + end = ((end + line_size - 1) & (~(line_size - 1))); > > + > > + local_irq_save(flags); > > + if (unlikely(start != old_start)) > > + memcpy(&cache_buf[0][0], (void *)start, line_size); > > + > > + if (unlikely(end != old_end)) > > + memcpy(&cache_buf[1][0], (void *)(old_end & (~(line_size - 1))), line_size); > > + > > + ax45mp_cpu_dcache_inval_range(vaddr, (void *)end, line_size); > > + > > + if (unlikely(start != old_start)) > > + memcpy((void *)start, &cache_buf[0][0], (old_start & (line_size - 1))); > > + > > + if (unlikely(end != old_end)) > > + memcpy((void *)(old_end + 1), > > + &cache_buf[1][(old_end & (line_size - 1)) + 1], > > + end - old_end - 1); > > + > > + local_irq_restore(flags); > > +} > > > +static int ax45mp_configure_l2_cache(struct device_node *np) > > +{ > > + int ret; > > + > > + ret = of_property_read_u32(np, "cache-line-size", &ax45mp_priv->ax45mp_cache_line_size); > > + if (ret) { > > + pr_err("Failed to get cache-line-size defaulting to 64 bytes\n"); > ^ > Looks like you need a comma here... > OK. > > + ax45mp_priv->ax45mp_cache_line_size = SZ_64; > > + } > > + > > + if (ax45mp_priv->ax45mp_cache_line_size != SZ_64) { > > + pr_err("Expected cache-line-size to 64 bytes (found:%u). Defaulting to 64 bytes\n", > ^ > ...and a "be" here. > > Would you also benefit from a pr_fmt here since you have no device? Or > else you could save the dev to your ax45mp_priv and avail of dev_err > here? > Sure, I'll switch to dev_err() > > + ax45mp_priv->ax45mp_cache_line_size); > > + ax45mp_priv->ax45mp_cache_line_size = SZ_64; > > + } > > + > > + ax45mp_priv->ucctl_ok = ax45mp_cpu_cache_controlable(); > ^ > That function name is a typo, should be called ax45mp_cpu_cache_cache_controllable(). > Ok, I'll rename it to ax45mp_cpu_cache_controllable(). > > + ax45mp_priv->l2cache_enabled = ax45mp_cpu_l2c_ctl_status() & AX45MP_L2_CACHE_CTL_CEN_MASK; > > + > > + return 0; > > +} > > + > > +static int ax45mp_l2c_probe(struct platform_device *pdev) > > +{ > > + struct device_node *np = pdev->dev.of_node; > > + int ret; > > + > > + ax45mp_priv = devm_kzalloc(&pdev->dev, sizeof(*ax45mp_priv), GFP_KERNEL); > > + if (!ax45mp_priv) > > + return -ENOMEM; > > + > > + ax45mp_priv->l2c_base = devm_of_iomap(&pdev->dev, pdev->dev.of_node, 0, NULL); > > + if (!ax45mp_priv->l2c_base) { > > + ret = -ENOMEM; > > + goto l2c_err; > > + } > > + > > + ret = ax45mp_configure_l2_cache(np); > > + if (ret) > > + goto l2c_err; > > + > > + ret = ax45mp_configure_pma_regions(np); > > + if (ret) > > + goto l2c_err; > > + > > + static_branch_disable(&ax45mp_l2c_configured); > > + > > + return 0; > > + > > +l2c_err: > > + devm_kfree(&pdev->dev, ax45mp_priv); > > + ax45mp_priv = NULL; > > + return ret; > > +} > > > diff --git a/drivers/soc/renesas/rzfive/ax45mp_sbi.h b/drivers/soc/renesas/rzfive/ax45mp_sbi.h > > new file mode 100644 > > index 000000000000..1604874954d0 > > --- /dev/null > > +++ b/drivers/soc/renesas/rzfive/ax45mp_sbi.h > > @@ -0,0 +1,29 @@ > > +/* SPDX-License-Identifier: GPL-2.0+ */ > > + > > +#ifndef __AX45MP_SBI_H > > +#define __AX45MP_SBI_H > > + > > +#define SBI_EXT_ANDES 0x0900031E > > + > > +enum ax45mp_sbi_ext_fid { > > + AX45MP_SBI_EXT_GET_MCACHE_CTL_STATUS = 0, > > Is that zero not implied? > I had a feeling we always had to explicitly specify in the Linux coding standard. Cheers, Prabhakar
On Fri, Nov 25, 2022 at 10:50:01AM +0000, Lad, Prabhakar wrote: > Hi Conor, > > Thank you for the review. > > On Thu, Nov 24, 2022 at 9:31 PM Conor Dooley <conor@kernel.org> wrote: > > But yeah, this is basically the sort of stuff that'd be nice to have in > > the previous patch! > > > Agreed, I'll include this in the binding patch too. I said "the previous patch" but I meant "the previous patch that I commented on the commit message for". I can hardly expect telepathy, so sorry for the poor wording.
Hi Prabhakar, On 11/24/22 11:22, Prabhakar wrote: > From: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com> > > On the AX45MP core, cache coherency is a specification option so it may > not be supported. In this case DMA will fail. As a workaround, firstly we > allocate a global dma coherent pool from which DMA allocations are taken > and marked as non-cacheable + bufferable using the PMA region as specified > in the device tree. Synchronization callbacks are implemented to > synchronize when doing DMA transactions. > > The Andes AX45MP core has a Programmable Physical Memory Attributes (PMA) > block that allows dynamic adjustment of memory attributes in the runtime. > It contains a configurable amount of PMA entries implemented as CSR > registers to control the attributes of memory locations in interest. > > Below are the memory attributes supported: > * Device, Non-bufferable > * Device, bufferable > * Memory, Non-cacheable, Non-bufferable > * Memory, Non-cacheable, Bufferable > * Memory, Write-back, No-allocate > * Memory, Write-back, Read-allocate > * Memory, Write-back, Write-allocate > * Memory, Write-back, Read and Write-allocate > > This patch adds support to configure the memory attributes of the memory > regions as passed from the l2 cache node and exposes the cache management > ops. Forgive my ignorance, but why do you need both a DMA pool and explicit cache maintenance? Wouldn't the purpose of marking a memory region as permanently non-cacheable be to avoid cache maintenance? And likewise, if you are doing cache maintenance anyway, why does it matter if/how the memory is cacheable? > More info about PMA (section 10.3): > Link: http://www.andestech.com/wp-content/uploads/AX45MP-1C-Rev.-5.0.0-Datasheet.pdf > > Signed-off-by: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com> > --- > RFC v3 -> v4 > * Made use of runtime patching instead of compile time > * Now just exposing single function ax45mp_no_iocp_cmo() for CMO handling > * Added a check to make sure cache line size is always 64 bytes > * Renamed folder rzf -> rzfive > * Improved Kconfig description > * Dropped L2 cache configuration > * Dropped unnecessary casts > * Fixed comments pointed by Geert, apart from use of PTR_ALIGN_XYZ() macros. > --- > arch/riscv/include/asm/cacheflush.h | 8 + > arch/riscv/include/asm/errata_list.h | 32 +- > drivers/soc/renesas/Kconfig | 7 + > drivers/soc/renesas/Makefile | 2 + > drivers/soc/renesas/rzfive/Kconfig | 6 + > drivers/soc/renesas/rzfive/Makefile | 3 + > drivers/soc/renesas/rzfive/ax45mp_cache.c | 415 ++++++++++++++++++++++ > drivers/soc/renesas/rzfive/ax45mp_sbi.h | 29 ++ > 8 files changed, 496 insertions(+), 6 deletions(-) > create mode 100644 drivers/soc/renesas/rzfive/Kconfig > create mode 100644 drivers/soc/renesas/rzfive/Makefile > create mode 100644 drivers/soc/renesas/rzfive/ax45mp_cache.c > create mode 100644 drivers/soc/renesas/rzfive/ax45mp_sbi.h > > diff --git a/arch/riscv/include/asm/cacheflush.h b/arch/riscv/include/asm/cacheflush.h > index 4a04d1be7c67..3226f3aceafe 100644 > --- a/arch/riscv/include/asm/cacheflush.h > +++ b/arch/riscv/include/asm/cacheflush.h > @@ -61,6 +61,14 @@ static inline void riscv_noncoherent_supported(void) {} > #define SYS_RISCV_FLUSH_ICACHE_LOCAL 1UL > #define SYS_RISCV_FLUSH_ICACHE_ALL (SYS_RISCV_FLUSH_ICACHE_LOCAL) > > +#ifdef CONFIG_AX45MP_L2_CACHE > +extern asmlinkage void ax45mp_no_iocp_cmo(unsigned int cache_size, void *vaddr, > + size_t size, int dir, int ops); > +#else > +inline void ax45mp_no_iocp_cmo(unsigned int cache_size, void *vaddr, > + size_t size, int dir, int ops) {} > +#endif > + > #include <asm-generic/cacheflush.h> > > #endif /* _ASM_RISCV_CACHEFLUSH_H */ > diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h > index 48e899a8e7a9..300fed3bfd80 100644 > --- a/arch/riscv/include/asm/errata_list.h > +++ b/arch/riscv/include/asm/errata_list.h > @@ -125,8 +125,8 @@ asm volatile(ALTERNATIVE( \ > #define THEAD_SYNC_S ".long 0x0190000b" > > #define ALT_CMO_OP(_op, _start, _size, _cachesize, _dir, _ops) \ > -asm volatile(ALTERNATIVE_2( \ > - __nops(6), \ > +asm volatile(ALTERNATIVE_3( \ > + __nops(14), \ > "mv a0, %1\n\t" \ > "j 2f\n\t" \ > "3:\n\t" \ > @@ -134,7 +134,7 @@ asm volatile(ALTERNATIVE_2( \ > "add a0, a0, %0\n\t" \ > "2:\n\t" \ > "bltu a0, %2, 3b\n\t" \ > - "nop", 0, CPUFEATURE_ZICBOM, CONFIG_RISCV_ISA_ZICBOM, \ > + __nops(8), 0, CPUFEATURE_ZICBOM, CONFIG_RISCV_ISA_ZICBOM, \ > "mv a0, %1\n\t" \ > "j 2f\n\t" \ > "3:\n\t" \ > @@ -142,8 +142,28 @@ asm volatile(ALTERNATIVE_2( \ > "add a0, a0, %0\n\t" \ > "2:\n\t" \ > "bltu a0, %2, 3b\n\t" \ > - THEAD_SYNC_S, THEAD_VENDOR_ID, \ > - ERRATA_THEAD_CMO, CONFIG_ERRATA_THEAD_CMO) \ > + THEAD_SYNC_S "\n\t" \ > + __nops(8), THEAD_VENDOR_ID, \ > + ERRATA_THEAD_CMO, CONFIG_ERRATA_THEAD_CMO, \ > + ".option push\n\t\n\t" \ > + ".option norvc\n\t" \ > + ".option norelax\n\t" \ > + "addi sp,sp,-16\n\t" \ > + "sd s0,0(sp)\n\t" \ > + "sd ra,8(sp)\n\t" \ > + "addi s0,sp,16\n\t" \ > + "mv a4,%6\n\t" \ > + "mv a3,%5\n\t" \ > + "mv a2,%4\n\t" \ > + "mv a1,%3\n\t" \ > + "mv a0,%0\n\t" \ > + "call ax45mp_no_iocp_cmo\n\t" \ > + "ld ra,8(sp)\n\t" \ > + "ld s0,0(sp)\n\t" \ > + "addi sp,sp,16\n\t" \ > + ".option pop\n\t", \ > + ANDESTECH_VENDOR_ID, ERRATA_ANDESTECH_NO_IOCP, \ > + CONFIG_ERRATA_ANDES_CMO) \ > : : "r"(_cachesize), \ > "r"((unsigned long)(_start) & ~((_cachesize) - 1UL)), \ > "r"((unsigned long)(_start) + (_size)), \ > @@ -151,7 +171,7 @@ asm volatile(ALTERNATIVE_2( \ > "r"((unsigned long)(_size)), \ > "r"((unsigned long)(_dir)), \ > "r"((unsigned long)(_ops)) \ > - : "a0") > + : "a0", "a1", "a2", "a3", "a4", "memory") > > #define THEAD_C9XX_RV_IRQ_PMU 17 > #define THEAD_C9XX_CSR_SCOUNTEROF 0x5c5 > diff --git a/drivers/soc/renesas/Kconfig b/drivers/soc/renesas/Kconfig > index 660498252ec5..e7810256c60d 100644 > --- a/drivers/soc/renesas/Kconfig > +++ b/drivers/soc/renesas/Kconfig > @@ -340,9 +340,16 @@ if RISCV > config ARCH_R9A07G043 > bool "RISC-V Platform support for RZ/Five" > select ARCH_RZG2L > + select AX45MP_L2_CACHE > + select DMA_GLOBAL_POOL > + select ERRATA_ANDES > + select ERRATA_ANDES_CMO > + select RISCV_DMA_NONCOHERENT > help > This enables support for the Renesas RZ/Five SoC. > > +source "drivers/soc/renesas/rzfive/Kconfig" > + > endif # RISCV > > config RST_RCAR > diff --git a/drivers/soc/renesas/Makefile b/drivers/soc/renesas/Makefile > index 535868c9c7e4..9df9f759a039 100644 > --- a/drivers/soc/renesas/Makefile > +++ b/drivers/soc/renesas/Makefile > @@ -31,6 +31,8 @@ ifdef CONFIG_SMP > obj-$(CONFIG_ARCH_R9A06G032) += r9a06g032-smp.o > endif > > +obj-$(CONFIG_RISCV) += rzfive/ > + > # Family > obj-$(CONFIG_RST_RCAR) += rcar-rst.o > obj-$(CONFIG_SYSC_RCAR) += rcar-sysc.o > diff --git a/drivers/soc/renesas/rzfive/Kconfig b/drivers/soc/renesas/rzfive/Kconfig > new file mode 100644 > index 000000000000..b6bc00337d99 > --- /dev/null > +++ b/drivers/soc/renesas/rzfive/Kconfig > @@ -0,0 +1,6 @@ > +# SPDX-License-Identifier: GPL-2.0 > + > +config AX45MP_L2_CACHE > + bool "Andes Technology AX45MP L2 Cache controller" > + help > + Support for the L2 cache controller on Andes Technology AX45MP platforms. > diff --git a/drivers/soc/renesas/rzfive/Makefile b/drivers/soc/renesas/rzfive/Makefile > new file mode 100644 > index 000000000000..2012e7fb978d > --- /dev/null > +++ b/drivers/soc/renesas/rzfive/Makefile > @@ -0,0 +1,3 @@ > +# SPDX-License-Identifier: GPL-2.0 > + > +obj-$(CONFIG_AX45MP_L2_CACHE) += ax45mp_cache.o > diff --git a/drivers/soc/renesas/rzfive/ax45mp_cache.c b/drivers/soc/renesas/rzfive/ax45mp_cache.c > new file mode 100644 > index 000000000000..4e0d0545d3af > --- /dev/null > +++ b/drivers/soc/renesas/rzfive/ax45mp_cache.c > @@ -0,0 +1,415 @@ > +// SPDX-License-Identifier: GPL-2.0 > +/* > + * PMA setup and non-coherent cache functions for Andes AX45MP > + * > + * Copyright (C) 2022 Renesas Electronics Corp. > + */ > + > +#include <linux/cacheflush.h> > +#include <linux/cacheinfo.h> > +#include <linux/dma-direction.h> > +#include <linux/of_address.h> > +#include <linux/of_platform.h> > + > +#include <asm/cacheflush.h> > +#include <asm/sbi.h> > + > +#include "ax45mp_sbi.h" > + > +/* L2 cache registers */ > +#define AX45MP_L2C_REG_CTL_OFFSET 0x8 > + > +#define AX45MP_L2C_REG_C0_CMD_OFFSET 0x40 > +#define AX45MP_L2C_REG_C0_ACC_OFFSET 0x48 > +#define AX45MP_L2C_REG_STATUS_OFFSET 0x80 > + > +/* D-cache operation */ > +#define AX45MP_CCTL_L1D_VA_INVAL 0 > +#define AX45MP_CCTL_L1D_VA_WB 1 > + > +/* L2 cache */ > +#define AX45MP_L2_CACHE_CTL_CEN_MASK 1 > + > +/* L2 CCTL status */ > +#define AX45MP_CCTL_L2_STATUS_IDLE 0 > + > +/* L2 CCTL status cores mask */ > +#define AX45MP_CCTL_L2_STATUS_C0_MASK 0xf > + > +/* L2 cache operation */ > +#define AX45MP_CCTL_L2_PA_INVAL 0x8 > +#define AX45MP_CCTL_L2_PA_WB 0x9 > + > +#define AX45MP_L2C_HPM_PER_CORE_OFFSET 0x8 > +#define AX45MP_L2C_REG_PER_CORE_OFFSET 0x10 > +#define AX45MP_CCTL_L2_STATUS_PER_CORE_OFFSET 4 > + > +#define AX45MP_L2C_REG_CN_CMD_OFFSET(n) \ > + (AX45MP_L2C_REG_C0_CMD_OFFSET + ((n) * AX45MP_L2C_REG_PER_CORE_OFFSET)) > +#define AX45MP_L2C_REG_CN_ACC_OFFSET(n) \ > + (AX45MP_L2C_REG_C0_ACC_OFFSET + ((n) * AX45MP_L2C_REG_PER_CORE_OFFSET)) > +#define AX45MP_CCTL_L2_STATUS_CN_MASK(n) \ > + (AX45MP_CCTL_L2_STATUS_C0_MASK << ((n) * AX45MP_CCTL_L2_STATUS_PER_CORE_OFFSET)) > + > +#define AX45MP_MICM_CFG_ISZ_OFFSET 6 > +#define AX45MP_MICM_CFG_ISZ_MASK (0x7 << AX45MP_MICM_CFG_ISZ_OFFSET) > + > +#define AX45MP_MDCM_CFG_DSZ_OFFSET 6 > +#define AX45MP_MDCM_CFG_DSZ_MASK (0x7 << AX45MP_MDCM_CFG_DSZ_OFFSET) > + > +#define AX45MP_CCTL_REG_UCCTLBEGINADDR_NUM 0x80b > +#define AX45MP_CCTL_REG_UCCTLCOMMAND_NUM 0x80c > + > +#define AX45MP_MCACHE_CTL_CCTL_SUEN_OFFSET 8 > +#define AX45MP_MMSC_CFG_CCTLCSR_OFFSET 16 > +#define AX45MP_MISA_20_OFFSET 20 > + > +#define AX45MP_MCACHE_CTL_CCTL_SUEN_MASK (0x1 << AX45MP_MCACHE_CTL_CCTL_SUEN_OFFSET) > +#define AX45MP_MMSC_CFG_CCTLCSR_MASK (0x1 << AX45MP_MMSC_CFG_CCTLCSR_OFFSET) > +#define AX45MP_MISA_20_MASK (0x1 << AX45MP_MISA_20_OFFSET) > + > +#define AX45MP_MAX_CACHE_LINE_SIZE 256 > + > +#define AX45MP_MAX_PMA_REGIONS 16 > + > +struct ax45mp_priv { > + void __iomem *l2c_base; > + u32 ax45mp_cache_line_size; > + bool l2cache_enabled; > + bool ucctl_ok; > +}; > + > +static struct ax45mp_priv *ax45mp_priv; > +static DEFINE_STATIC_KEY_FALSE(ax45mp_l2c_configured); > + > +/* PMA setup */ > +static long ax45mp_sbi_set_pma(unsigned long start, > + unsigned long size, > + unsigned long flags, > + unsigned int entry_id) > +{ > + struct sbiret ret; > + > + ret = sbi_ecall(SBI_EXT_ANDES, AX45MP_SBI_EXT_SET_PMA, > + start, size, entry_id, flags, 0, 0); > + > + return ret.value; > +} > + > +static int ax45mp_configure_pma_regions(struct device_node *np) > +{ > + const char *propname = "andestech,pma-regions"; > + u32 start, size, flags; > + unsigned int entry_id; > + unsigned int i; > + int count; > + int ret; > + > + count = of_property_count_elems_of_size(np, propname, sizeof(u32) * 3); > + if (count < 0) > + return count; > + > + if (count > AX45MP_MAX_PMA_REGIONS) > + return -EINVAL; > + > + for (i = 0, entry_id = 0 ; entry_id < count ; i += 3, entry_id++) { > + of_property_read_u32_index(np, propname, i, &start); > + of_property_read_u32_index(np, propname, i + 1, &size); > + of_property_read_u32_index(np, propname, i + 2, &flags); > + ret = ax45mp_sbi_set_pma(start, size, flags, entry_id); > + if (!ret) > + pr_err("Failed to setup PMA region 0x%x - 0x%x flags: 0x%x", > + start, start + size, flags); > + } > + > + return 0; > +} If firmware support is required to set up these PMA regions, why is Linux doing this at all? The firmware has access to the devicetree as well. It can set this up before entering S-mode, and then you don't need to expose this capability via an SBI extension. In fact, firmware could generate the reserved-memory node based on these regions at runtime (or vice versa). > + > +/* L2 Cache operations */ > +static uint32_t ax45mp_cpu_get_mcache_ctl_status(void) > +{ > + struct sbiret ret; > + > + ret = sbi_ecall(SBI_EXT_ANDES, AX45MP_SBI_EXT_GET_MCACHE_CTL_STATUS, > + 0, 0, 0, 0, 0, 0); > + return ret.value; > +} > + > +static uint32_t ax45mp_cpu_get_micm_cfg_status(void) > +{ > + struct sbiret ret; > + > + ret = sbi_ecall(SBI_EXT_ANDES, AX45MP_SBI_EXT_GET_MICM_CTL_STATUS, > + 0, 0, 0, 0, 0, 0); > + return ret.value; > +} > + > +static uint32_t ax45mp_cpu_get_mdcm_cfg_status(void) > +{ > + struct sbiret ret; > + > + ret = sbi_ecall(SBI_EXT_ANDES, AX45MP_SBI_EXT_GET_MDCM_CTL_STATUS, > + 0, 0, 0, 0, 0, 0); > + return ret.value; > +} > + > +static uint32_t ax45mp_cpu_get_mmsc_cfg_status(void) > +{ > + struct sbiret ret; > + > + ret = sbi_ecall(SBI_EXT_ANDES, AX45MP_SBI_EXT_GET_MMSC_CTL_STATUS, > + 0, 0, 0, 0, 0, 0); > + return ret.value; > +} > + > +static uint32_t ax45mp_cpu_get_misa_cfg_status(void) > +{ > + struct sbiret ret; > + > + ret = sbi_ecall(SBI_EXT_ANDES, AX45MP_SBI_EXT_GET_MISA_CTL_STATUS, > + 0, 0, 0, 0, 0, 0); > + return ret.value; > +} > + > +static inline uint32_t ax45mp_cpu_l2c_get_cctl_status(void) > +{ > + return readl(ax45mp_priv->l2c_base + AX45MP_L2C_REG_STATUS_OFFSET); > +} > + > +static inline uint32_t ax45mp_cpu_l2c_ctl_status(void) > +{ > + return readl(ax45mp_priv->l2c_base + AX45MP_L2C_REG_CTL_OFFSET); > +} > + > +static bool ax45mp_cpu_cache_controlable(void) > +{ > + return (((ax45mp_cpu_get_micm_cfg_status() & AX45MP_MICM_CFG_ISZ_MASK) || > + (ax45mp_cpu_get_mdcm_cfg_status() & AX45MP_MDCM_CFG_DSZ_MASK)) && > + (ax45mp_cpu_get_misa_cfg_status() & AX45MP_MISA_20_MASK) && > + (ax45mp_cpu_get_mmsc_cfg_status() & AX45MP_MMSC_CFG_CCTLCSR_MASK) && > + (ax45mp_cpu_get_mcache_ctl_status() & AX45MP_MCACHE_CTL_CCTL_SUEN_MASK)); > +} > + > +static void ax45mp_cpu_dcache_wb_range(void *start, void *end, int line_size) > +{ > + void __iomem *base = ax45mp_priv->l2c_base; > + unsigned long pa; > + int mhartid = 0; > +#ifdef CONFIG_SMP > + mhartid = smp_processor_id(); > +#endif This doesn't need an #ifdef. smp_processor_id() already returns zero when SMP is disabled. > + > + while (end > start) { > + if (ax45mp_priv->ucctl_ok) { > + csr_write(AX45MP_CCTL_REG_UCCTLBEGINADDR_NUM, start); > + csr_write(AX45MP_CCTL_REG_UCCTLCOMMAND_NUM, AX45MP_CCTL_L1D_VA_WB); > + } > + > + if (ax45mp_priv->l2cache_enabled) { > + pa = virt_to_phys(start); > + writel(pa, base + AX45MP_L2C_REG_CN_ACC_OFFSET(mhartid)); > + writel(AX45MP_CCTL_L2_PA_WB, > + base + AX45MP_L2C_REG_CN_CMD_OFFSET(mhartid)); > + while ((ax45mp_cpu_l2c_get_cctl_status() & > + AX45MP_CCTL_L2_STATUS_CN_MASK(mhartid)) != > + AX45MP_CCTL_L2_STATUS_IDLE) > + ; > + } > + > + start += line_size; > + } > +} > + > +static void ax45mp_cpu_dcache_inval_range(void *start, void *end, int line_size) > +{ > + void __iomem *base = ax45mp_priv->l2c_base; > + unsigned long pa; > + int mhartid = 0; > +#ifdef CONFIG_SMP > + mhartid = smp_processor_id(); > +#endif > + > + while (end > start) { > + if (ax45mp_priv->ucctl_ok) { > + csr_write(AX45MP_CCTL_REG_UCCTLBEGINADDR_NUM, start); > + csr_write(AX45MP_CCTL_REG_UCCTLCOMMAND_NUM, AX45MP_CCTL_L1D_VA_INVAL); > + } > + > + if (ax45mp_priv->l2cache_enabled) { > + pa = virt_to_phys(start); > + writel(pa, base + AX45MP_L2C_REG_CN_ACC_OFFSET(mhartid)); > + writel(AX45MP_CCTL_L2_PA_INVAL, > + base + AX45MP_L2C_REG_CN_CMD_OFFSET(mhartid)); > + while ((ax45mp_cpu_l2c_get_cctl_status() & > + AX45MP_CCTL_L2_STATUS_CN_MASK(mhartid)) != > + AX45MP_CCTL_L2_STATUS_IDLE) > + ; > + } > + > + start += line_size; > + } > +} > + > +static void ax45mp_cpu_dma_inval_range(void *vaddr, size_t size) > +{ > + char cache_buf[2][AX45MP_MAX_CACHE_LINE_SIZE]; > + unsigned long start = (unsigned long)vaddr; > + unsigned long end = start + size; > + unsigned long old_start = start; > + unsigned long old_end = end; > + unsigned long line_size; > + unsigned long flags; > + > + if (static_branch_unlikely(&ax45mp_l2c_configured) && !ax45mp_priv) > + return; > + > + if (unlikely(start == end)) > + return; > + > + line_size = ax45mp_priv->ax45mp_cache_line_size; > + > + memset(&cache_buf, 0x0, sizeof(cache_buf)); > + start = start & (~(line_size - 1)); > + end = ((end + line_size - 1) & (~(line_size - 1))); > + > + local_irq_save(flags); > + if (unlikely(start != old_start)) > + memcpy(&cache_buf[0][0], (void *)start, line_size); > + > + if (unlikely(end != old_end)) > + memcpy(&cache_buf[1][0], (void *)(old_end & (~(line_size - 1))), line_size); The memcpy dance is only required if ax45mp_cache_line_size is larger than ARCH_DMA_MINALIGN. Is that actually the case in practice? If not, you could verify this in the probe function, and remove this logic. > + > + ax45mp_cpu_dcache_inval_range(vaddr, (void *)end, line_size); > + > + if (unlikely(start != old_start)) > + memcpy((void *)start, &cache_buf[0][0], (old_start & (line_size - 1))); > + > + if (unlikely(end != old_end)) > + memcpy((void *)(old_end + 1), > + &cache_buf[1][(old_end & (line_size - 1)) + 1], > + end - old_end - 1); > + > + local_irq_restore(flags); > +} > + > +static void ax45mp_cpu_dma_wb_range(void *vaddr, size_t size) > +{ > + unsigned long start = (unsigned long)vaddr; > + unsigned long end = start + size; > + unsigned long line_size; > + unsigned long flags; > + > + if (static_branch_unlikely(&ax45mp_l2c_configured) && !ax45mp_priv) > + return; > + > + line_size = ax45mp_priv->ax45mp_cache_line_size; > + local_irq_save(flags); > + start = start & (~(line_size - 1)); > + ax45mp_cpu_dcache_wb_range(vaddr, (void *)end, line_size); > + local_irq_restore(flags); > +} > + > +void ax45mp_no_iocp_cmo(unsigned int cache_size, void *vaddr, size_t size, int dir, int ops) > +{ > + if (ops == NON_COHERENT_DMA_PREP) > + return; > + > + if (ops == NON_COHERENT_SYNC_DMA_FOR_DEVICE) { > + switch (dir) { > + case DMA_FROM_DEVICE: > + ax45mp_cpu_dma_inval_range(vaddr, size); > + break; > + case DMA_TO_DEVICE: > + case DMA_BIDIRECTIONAL: > + ax45mp_cpu_dma_wb_range(vaddr, size); > + break; > + default: > + break; > + } > + return; > + } > + > + /* op == NON_COHERENT_SYNC_DMA_FOR_CPU */ > + if (dir == DMA_BIDIRECTIONAL || dir == DMA_FROM_DEVICE) > + ax45mp_cpu_dma_inval_range(vaddr, size); > +} > +EXPORT_SYMBOL(ax45mp_no_iocp_cmo); > + > +static int ax45mp_configure_l2_cache(struct device_node *np) > +{ > + int ret; > + > + ret = of_property_read_u32(np, "cache-line-size", &ax45mp_priv->ax45mp_cache_line_size); > + if (ret) { > + pr_err("Failed to get cache-line-size defaulting to 64 bytes\n"); > + ax45mp_priv->ax45mp_cache_line_size = SZ_64; > + } > + > + if (ax45mp_priv->ax45mp_cache_line_size != SZ_64) { > + pr_err("Expected cache-line-size to 64 bytes (found:%u). Defaulting to 64 bytes\n", > + ax45mp_priv->ax45mp_cache_line_size); > + ax45mp_priv->ax45mp_cache_line_size = SZ_64; > + } Ah, so you already do this. And SZ_64 == ARCH_DMA_MINALIGN. So you do not need the memcpy logic. > + > + ax45mp_priv->ucctl_ok = ax45mp_cpu_cache_controlable(); > + ax45mp_priv->l2cache_enabled = ax45mp_cpu_l2c_ctl_status() & AX45MP_L2_CACHE_CTL_CEN_MASK; > + > + return 0; > +} > + > +static int ax45mp_l2c_probe(struct platform_device *pdev) > +{ > + struct device_node *np = pdev->dev.of_node; > + int ret; > + > + ax45mp_priv = devm_kzalloc(&pdev->dev, sizeof(*ax45mp_priv), GFP_KERNEL); > + if (!ax45mp_priv) > + return -ENOMEM; > + > + ax45mp_priv->l2c_base = devm_of_iomap(&pdev->dev, pdev->dev.of_node, 0, NULL); devm_platform_ioremap_resource() > + if (!ax45mp_priv->l2c_base) { > + ret = -ENOMEM; > + goto l2c_err; > + } > + > + ret = ax45mp_configure_l2_cache(np); > + if (ret) > + goto l2c_err; > + > + ret = ax45mp_configure_pma_regions(np); > + if (ret) > + goto l2c_err; > + > + static_branch_disable(&ax45mp_l2c_configured); Instead of enabling this before the probe function, and disabling it afterward, just enable it once here, in the success case. Then you can drop the !ax45mp_priv check in the functions above. And none of the functions would get called anyway if the alternative is not applied. I suppose it's not possible to do some of this probe logic in the alternative check function? > + > + return 0; > + > +l2c_err: > + devm_kfree(&pdev->dev, ax45mp_priv); > + ax45mp_priv = NULL; None of this cleanup is necessary. Regards, Samuel > + return ret; > +} > + > +static const struct of_device_id ax45mp_cache_ids[] = { > + { .compatible = "andestech,ax45mp-cache" }, > + { /* sentinel */ } > +}; > + > +static struct platform_driver ax45mp_l2c_driver = { > + .driver = { > + .name = "ax45mp-l2c", > + .of_match_table = ax45mp_cache_ids, > + }, > + .probe = ax45mp_l2c_probe, > +}; > + > +static int __init ax45mp_cache_init(void) > +{ > + static_branch_enable(&ax45mp_l2c_configured); > + return platform_driver_register(&ax45mp_l2c_driver); > +} > +arch_initcall(ax45mp_cache_init); > + > +MODULE_AUTHOR("Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>"); > +MODULE_DESCRIPTION("Andes AX45MP L2 cache driver"); > +MODULE_LICENSE("GPL"); > diff --git a/drivers/soc/renesas/rzfive/ax45mp_sbi.h b/drivers/soc/renesas/rzfive/ax45mp_sbi.h > new file mode 100644 > index 000000000000..1604874954d0 > --- /dev/null > +++ b/drivers/soc/renesas/rzfive/ax45mp_sbi.h > @@ -0,0 +1,29 @@ > +/* SPDX-License-Identifier: GPL-2.0+ */ > + > +#ifndef __AX45MP_SBI_H > +#define __AX45MP_SBI_H > + > +#define SBI_EXT_ANDES 0x0900031E > + > +enum ax45mp_sbi_ext_fid { > + AX45MP_SBI_EXT_GET_MCACHE_CTL_STATUS = 0, > + AX45MP_SBI_EXT_GET_MMISC_CTL_STATUS, > + AX45MP_SBI_EXT_SET_MCACHE_CTL, > + AX45MP_SBI_EXT_SET_MMISC_CTL, > + AX45MP_SBI_EXT_ICACHE_OP, > + AX45MP_SBI_EXT_DCACHE_OP, > + AX45MP_SBI_EXT_L1CACHE_I_PREFETCH, > + AX45MP_SBI_EXT_L1CACHE_D_PREFETCH, > + AX45MP_SBI_EXT_NON_BLOCKING_LOAD_STORE, > + AX45MP_SBI_EXT_WRITE_AROUND, > + AX45MP_SBI_EXT_SET_PMA, > + AX45MP_SBI_EXT_FREE_PMA, > + AX45MP_SBI_EXT_PROBE_PMA, > + AX45MP_SBI_EXT_DCACHE_WBINVAL_ALL, > + AX45MP_SBI_EXT_GET_MICM_CTL_STATUS, > + AX45MP_SBI_EXT_GET_MDCM_CTL_STATUS, > + AX45MP_SBI_EXT_GET_MMSC_CTL_STATUS, > + AX45MP_SBI_EXT_GET_MISA_CTL_STATUS, > +}; > + > +#endif
Hi Samuel, Thank you for the review. On Fri, Nov 25, 2022 at 7:43 PM Samuel Holland <samuel@sholland.org> wrote: > > Hi Prabhakar, > > On 11/24/22 11:22, Prabhakar wrote: > > From: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com> > > > > On the AX45MP core, cache coherency is a specification option so it may > > not be supported. In this case DMA will fail. As a workaround, firstly we > > allocate a global dma coherent pool from which DMA allocations are taken > > and marked as non-cacheable + bufferable using the PMA region as specified > > in the device tree. Synchronization callbacks are implemented to > > synchronize when doing DMA transactions. > > > > The Andes AX45MP core has a Programmable Physical Memory Attributes (PMA) > > block that allows dynamic adjustment of memory attributes in the runtime. > > It contains a configurable amount of PMA entries implemented as CSR > > registers to control the attributes of memory locations in interest. > > > > Below are the memory attributes supported: > > * Device, Non-bufferable > > * Device, bufferable > > * Memory, Non-cacheable, Non-bufferable > > * Memory, Non-cacheable, Bufferable > > * Memory, Write-back, No-allocate > > * Memory, Write-back, Read-allocate > > * Memory, Write-back, Write-allocate > > * Memory, Write-back, Read and Write-allocate > > > > This patch adds support to configure the memory attributes of the memory > > regions as passed from the l2 cache node and exposes the cache management > > ops. > > Forgive my ignorance, but why do you need both a DMA pool and explicit > cache maintenance? Wouldn't the purpose of marking a memory region as > permanently non-cacheable be to avoid cache maintenance? And likewise, > if you are doing cache maintenance anyway, why does it matter if/how the > memory is cacheable? > "Memory, Non-cacheable, Bufferable" raises an AXI signal for transactions hence needing SW implementation for cache maintenance. > > More info about PMA (section 10.3): > > Link: http://www.andestech.com/wp-content/uploads/AX45MP-1C-Rev.-5.0.0-Datasheet.pdf > > > > Signed-off-by: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com> > > --- > > RFC v3 -> v4 > > * Made use of runtime patching instead of compile time > > * Now just exposing single function ax45mp_no_iocp_cmo() for CMO handling > > * Added a check to make sure cache line size is always 64 bytes > > * Renamed folder rzf -> rzfive > > * Improved Kconfig description > > * Dropped L2 cache configuration > > * Dropped unnecessary casts > > * Fixed comments pointed by Geert, apart from use of PTR_ALIGN_XYZ() macros. > > --- > > arch/riscv/include/asm/cacheflush.h | 8 + > > arch/riscv/include/asm/errata_list.h | 32 +- > > drivers/soc/renesas/Kconfig | 7 + > > drivers/soc/renesas/Makefile | 2 + > > drivers/soc/renesas/rzfive/Kconfig | 6 + > > drivers/soc/renesas/rzfive/Makefile | 3 + > > drivers/soc/renesas/rzfive/ax45mp_cache.c | 415 ++++++++++++++++++++++ > > drivers/soc/renesas/rzfive/ax45mp_sbi.h | 29 ++ > > 8 files changed, 496 insertions(+), 6 deletions(-) > > create mode 100644 drivers/soc/renesas/rzfive/Kconfig > > create mode 100644 drivers/soc/renesas/rzfive/Makefile > > create mode 100644 drivers/soc/renesas/rzfive/ax45mp_cache.c > > create mode 100644 drivers/soc/renesas/rzfive/ax45mp_sbi.h > > > > diff --git a/arch/riscv/include/asm/cacheflush.h b/arch/riscv/include/asm/cacheflush.h > > index 4a04d1be7c67..3226f3aceafe 100644 > > --- a/arch/riscv/include/asm/cacheflush.h > > +++ b/arch/riscv/include/asm/cacheflush.h > > @@ -61,6 +61,14 @@ static inline void riscv_noncoherent_supported(void) {} > > #define SYS_RISCV_FLUSH_ICACHE_LOCAL 1UL > > #define SYS_RISCV_FLUSH_ICACHE_ALL (SYS_RISCV_FLUSH_ICACHE_LOCAL) > > > > +#ifdef CONFIG_AX45MP_L2_CACHE > > +extern asmlinkage void ax45mp_no_iocp_cmo(unsigned int cache_size, void *vaddr, > > + size_t size, int dir, int ops); > > +#else > > +inline void ax45mp_no_iocp_cmo(unsigned int cache_size, void *vaddr, > > + size_t size, int dir, int ops) {} > > +#endif > > + > > #include <asm-generic/cacheflush.h> > > > > #endif /* _ASM_RISCV_CACHEFLUSH_H */ > > diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h > > index 48e899a8e7a9..300fed3bfd80 100644 > > --- a/arch/riscv/include/asm/errata_list.h > > +++ b/arch/riscv/include/asm/errata_list.h > > @@ -125,8 +125,8 @@ asm volatile(ALTERNATIVE( \ > > #define THEAD_SYNC_S ".long 0x0190000b" > > > > #define ALT_CMO_OP(_op, _start, _size, _cachesize, _dir, _ops) \ > > -asm volatile(ALTERNATIVE_2( \ > > - __nops(6), \ > > +asm volatile(ALTERNATIVE_3( \ > > + __nops(14), \ > > "mv a0, %1\n\t" \ > > "j 2f\n\t" \ > > "3:\n\t" \ > > @@ -134,7 +134,7 @@ asm volatile(ALTERNATIVE_2( \ > > "add a0, a0, %0\n\t" \ > > "2:\n\t" \ > > "bltu a0, %2, 3b\n\t" \ > > - "nop", 0, CPUFEATURE_ZICBOM, CONFIG_RISCV_ISA_ZICBOM, \ > > + __nops(8), 0, CPUFEATURE_ZICBOM, CONFIG_RISCV_ISA_ZICBOM, \ > > "mv a0, %1\n\t" \ > > "j 2f\n\t" \ > > "3:\n\t" \ > > @@ -142,8 +142,28 @@ asm volatile(ALTERNATIVE_2( \ > > "add a0, a0, %0\n\t" \ > > "2:\n\t" \ > > "bltu a0, %2, 3b\n\t" \ > > - THEAD_SYNC_S, THEAD_VENDOR_ID, \ > > - ERRATA_THEAD_CMO, CONFIG_ERRATA_THEAD_CMO) \ > > + THEAD_SYNC_S "\n\t" \ > > + __nops(8), THEAD_VENDOR_ID, \ > > + ERRATA_THEAD_CMO, CONFIG_ERRATA_THEAD_CMO, \ > > + ".option push\n\t\n\t" \ > > + ".option norvc\n\t" \ > > + ".option norelax\n\t" \ > > + "addi sp,sp,-16\n\t" \ > > + "sd s0,0(sp)\n\t" \ > > + "sd ra,8(sp)\n\t" \ > > + "addi s0,sp,16\n\t" \ > > + "mv a4,%6\n\t" \ > > + "mv a3,%5\n\t" \ > > + "mv a2,%4\n\t" \ > > + "mv a1,%3\n\t" \ > > + "mv a0,%0\n\t" \ > > + "call ax45mp_no_iocp_cmo\n\t" \ > > + "ld ra,8(sp)\n\t" \ > > + "ld s0,0(sp)\n\t" \ > > + "addi sp,sp,16\n\t" \ > > + ".option pop\n\t", \ > > + ANDESTECH_VENDOR_ID, ERRATA_ANDESTECH_NO_IOCP, \ > > + CONFIG_ERRATA_ANDES_CMO) \ > > : : "r"(_cachesize), \ > > "r"((unsigned long)(_start) & ~((_cachesize) - 1UL)), \ > > "r"((unsigned long)(_start) + (_size)), \ > > @@ -151,7 +171,7 @@ asm volatile(ALTERNATIVE_2( \ > > "r"((unsigned long)(_size)), \ > > "r"((unsigned long)(_dir)), \ > > "r"((unsigned long)(_ops)) \ > > - : "a0") > > + : "a0", "a1", "a2", "a3", "a4", "memory") > > > > #define THEAD_C9XX_RV_IRQ_PMU 17 > > #define THEAD_C9XX_CSR_SCOUNTEROF 0x5c5 > > diff --git a/drivers/soc/renesas/Kconfig b/drivers/soc/renesas/Kconfig > > index 660498252ec5..e7810256c60d 100644 > > --- a/drivers/soc/renesas/Kconfig > > +++ b/drivers/soc/renesas/Kconfig > > @@ -340,9 +340,16 @@ if RISCV > > config ARCH_R9A07G043 > > bool "RISC-V Platform support for RZ/Five" > > select ARCH_RZG2L > > + select AX45MP_L2_CACHE > > + select DMA_GLOBAL_POOL > > + select ERRATA_ANDES > > + select ERRATA_ANDES_CMO > > + select RISCV_DMA_NONCOHERENT > > help > > This enables support for the Renesas RZ/Five SoC. > > > > +source "drivers/soc/renesas/rzfive/Kconfig" > > + > > endif # RISCV > > > > config RST_RCAR > > diff --git a/drivers/soc/renesas/Makefile b/drivers/soc/renesas/Makefile > > index 535868c9c7e4..9df9f759a039 100644 > > --- a/drivers/soc/renesas/Makefile > > +++ b/drivers/soc/renesas/Makefile > > @@ -31,6 +31,8 @@ ifdef CONFIG_SMP > > obj-$(CONFIG_ARCH_R9A06G032) += r9a06g032-smp.o > > endif > > > > +obj-$(CONFIG_RISCV) += rzfive/ > > + > > # Family > > obj-$(CONFIG_RST_RCAR) += rcar-rst.o > > obj-$(CONFIG_SYSC_RCAR) += rcar-sysc.o > > diff --git a/drivers/soc/renesas/rzfive/Kconfig b/drivers/soc/renesas/rzfive/Kconfig > > new file mode 100644 > > index 000000000000..b6bc00337d99 > > --- /dev/null > > +++ b/drivers/soc/renesas/rzfive/Kconfig > > @@ -0,0 +1,6 @@ > > +# SPDX-License-Identifier: GPL-2.0 > > + > > +config AX45MP_L2_CACHE > > + bool "Andes Technology AX45MP L2 Cache controller" > > + help > > + Support for the L2 cache controller on Andes Technology AX45MP platforms. > > diff --git a/drivers/soc/renesas/rzfive/Makefile b/drivers/soc/renesas/rzfive/Makefile > > new file mode 100644 > > index 000000000000..2012e7fb978d > > --- /dev/null > > +++ b/drivers/soc/renesas/rzfive/Makefile > > @@ -0,0 +1,3 @@ > > +# SPDX-License-Identifier: GPL-2.0 > > + > > +obj-$(CONFIG_AX45MP_L2_CACHE) += ax45mp_cache.o > > diff --git a/drivers/soc/renesas/rzfive/ax45mp_cache.c b/drivers/soc/renesas/rzfive/ax45mp_cache.c > > new file mode 100644 > > index 000000000000..4e0d0545d3af > > --- /dev/null > > +++ b/drivers/soc/renesas/rzfive/ax45mp_cache.c > > @@ -0,0 +1,415 @@ > > +// SPDX-License-Identifier: GPL-2.0 > > +/* > > + * PMA setup and non-coherent cache functions for Andes AX45MP > > + * > > + * Copyright (C) 2022 Renesas Electronics Corp. > > + */ > > + > > +#include <linux/cacheflush.h> > > +#include <linux/cacheinfo.h> > > +#include <linux/dma-direction.h> > > +#include <linux/of_address.h> > > +#include <linux/of_platform.h> > > + > > +#include <asm/cacheflush.h> > > +#include <asm/sbi.h> > > + > > +#include "ax45mp_sbi.h" > > + > > +/* L2 cache registers */ > > +#define AX45MP_L2C_REG_CTL_OFFSET 0x8 > > + > > +#define AX45MP_L2C_REG_C0_CMD_OFFSET 0x40 > > +#define AX45MP_L2C_REG_C0_ACC_OFFSET 0x48 > > +#define AX45MP_L2C_REG_STATUS_OFFSET 0x80 > > + > > +/* D-cache operation */ > > +#define AX45MP_CCTL_L1D_VA_INVAL 0 > > +#define AX45MP_CCTL_L1D_VA_WB 1 > > + > > +/* L2 cache */ > > +#define AX45MP_L2_CACHE_CTL_CEN_MASK 1 > > + > > +/* L2 CCTL status */ > > +#define AX45MP_CCTL_L2_STATUS_IDLE 0 > > + > > +/* L2 CCTL status cores mask */ > > +#define AX45MP_CCTL_L2_STATUS_C0_MASK 0xf > > + > > +/* L2 cache operation */ > > +#define AX45MP_CCTL_L2_PA_INVAL 0x8 > > +#define AX45MP_CCTL_L2_PA_WB 0x9 > > + > > +#define AX45MP_L2C_HPM_PER_CORE_OFFSET 0x8 > > +#define AX45MP_L2C_REG_PER_CORE_OFFSET 0x10 > > +#define AX45MP_CCTL_L2_STATUS_PER_CORE_OFFSET 4 > > + > > +#define AX45MP_L2C_REG_CN_CMD_OFFSET(n) \ > > + (AX45MP_L2C_REG_C0_CMD_OFFSET + ((n) * AX45MP_L2C_REG_PER_CORE_OFFSET)) > > +#define AX45MP_L2C_REG_CN_ACC_OFFSET(n) \ > > + (AX45MP_L2C_REG_C0_ACC_OFFSET + ((n) * AX45MP_L2C_REG_PER_CORE_OFFSET)) > > +#define AX45MP_CCTL_L2_STATUS_CN_MASK(n) \ > > + (AX45MP_CCTL_L2_STATUS_C0_MASK << ((n) * AX45MP_CCTL_L2_STATUS_PER_CORE_OFFSET)) > > + > > +#define AX45MP_MICM_CFG_ISZ_OFFSET 6 > > +#define AX45MP_MICM_CFG_ISZ_MASK (0x7 << AX45MP_MICM_CFG_ISZ_OFFSET) > > + > > +#define AX45MP_MDCM_CFG_DSZ_OFFSET 6 > > +#define AX45MP_MDCM_CFG_DSZ_MASK (0x7 << AX45MP_MDCM_CFG_DSZ_OFFSET) > > + > > +#define AX45MP_CCTL_REG_UCCTLBEGINADDR_NUM 0x80b > > +#define AX45MP_CCTL_REG_UCCTLCOMMAND_NUM 0x80c > > + > > +#define AX45MP_MCACHE_CTL_CCTL_SUEN_OFFSET 8 > > +#define AX45MP_MMSC_CFG_CCTLCSR_OFFSET 16 > > +#define AX45MP_MISA_20_OFFSET 20 > > + > > +#define AX45MP_MCACHE_CTL_CCTL_SUEN_MASK (0x1 << AX45MP_MCACHE_CTL_CCTL_SUEN_OFFSET) > > +#define AX45MP_MMSC_CFG_CCTLCSR_MASK (0x1 << AX45MP_MMSC_CFG_CCTLCSR_OFFSET) > > +#define AX45MP_MISA_20_MASK (0x1 << AX45MP_MISA_20_OFFSET) > > + > > +#define AX45MP_MAX_CACHE_LINE_SIZE 256 > > + > > +#define AX45MP_MAX_PMA_REGIONS 16 > > + > > +struct ax45mp_priv { > > + void __iomem *l2c_base; > > + u32 ax45mp_cache_line_size; > > + bool l2cache_enabled; > > + bool ucctl_ok; > > +}; > > + > > +static struct ax45mp_priv *ax45mp_priv; > > +static DEFINE_STATIC_KEY_FALSE(ax45mp_l2c_configured); > > + > > +/* PMA setup */ > > +static long ax45mp_sbi_set_pma(unsigned long start, > > + unsigned long size, > > + unsigned long flags, > > + unsigned int entry_id) > > +{ > > + struct sbiret ret; > > + > > + ret = sbi_ecall(SBI_EXT_ANDES, AX45MP_SBI_EXT_SET_PMA, > > + start, size, entry_id, flags, 0, 0); > > + > > + return ret.value; > > +} > > + > > +static int ax45mp_configure_pma_regions(struct device_node *np) > > +{ > > + const char *propname = "andestech,pma-regions"; > > + u32 start, size, flags; > > + unsigned int entry_id; > > + unsigned int i; > > + int count; > > + int ret; > > + > > + count = of_property_count_elems_of_size(np, propname, sizeof(u32) * 3); > > + if (count < 0) > > + return count; > > + > > + if (count > AX45MP_MAX_PMA_REGIONS) > > + return -EINVAL; > > + > > + for (i = 0, entry_id = 0 ; entry_id < count ; i += 3, entry_id++) { > > + of_property_read_u32_index(np, propname, i, &start); > > + of_property_read_u32_index(np, propname, i + 1, &size); > > + of_property_read_u32_index(np, propname, i + 2, &flags); > > + ret = ax45mp_sbi_set_pma(start, size, flags, entry_id); > > + if (!ret) > > + pr_err("Failed to setup PMA region 0x%x - 0x%x flags: 0x%x", > > + start, start + size, flags); > > + } > > + > > + return 0; > > +} > > If firmware support is required to set up these PMA regions, why is > Linux doing this at all? The firmware has access to the devicetree as > well. It can set this up before entering S-mode, and then you don't need > to expose this capability via an SBI extension. In fact, firmware could > generate the reserved-memory node based on these regions at runtime (or > vice versa). > That's a good point. I'll do some research on this and get back. Btw are there any existing examples where the firmware adds DT nodes? > > + > > +/* L2 Cache operations */ > > +static uint32_t ax45mp_cpu_get_mcache_ctl_status(void) > > +{ > > + struct sbiret ret; > > + > > + ret = sbi_ecall(SBI_EXT_ANDES, AX45MP_SBI_EXT_GET_MCACHE_CTL_STATUS, > > + 0, 0, 0, 0, 0, 0); > > + return ret.value; > > +} > > + > > +static uint32_t ax45mp_cpu_get_micm_cfg_status(void) > > +{ > > + struct sbiret ret; > > + > > + ret = sbi_ecall(SBI_EXT_ANDES, AX45MP_SBI_EXT_GET_MICM_CTL_STATUS, > > + 0, 0, 0, 0, 0, 0); > > + return ret.value; > > +} > > + > > +static uint32_t ax45mp_cpu_get_mdcm_cfg_status(void) > > +{ > > + struct sbiret ret; > > + > > + ret = sbi_ecall(SBI_EXT_ANDES, AX45MP_SBI_EXT_GET_MDCM_CTL_STATUS, > > + 0, 0, 0, 0, 0, 0); > > + return ret.value; > > +} > > + > > +static uint32_t ax45mp_cpu_get_mmsc_cfg_status(void) > > +{ > > + struct sbiret ret; > > + > > + ret = sbi_ecall(SBI_EXT_ANDES, AX45MP_SBI_EXT_GET_MMSC_CTL_STATUS, > > + 0, 0, 0, 0, 0, 0); > > + return ret.value; > > +} > > + > > +static uint32_t ax45mp_cpu_get_misa_cfg_status(void) > > +{ > > + struct sbiret ret; > > + > > + ret = sbi_ecall(SBI_EXT_ANDES, AX45MP_SBI_EXT_GET_MISA_CTL_STATUS, > > + 0, 0, 0, 0, 0, 0); > > + return ret.value; > > +} > > + > > +static inline uint32_t ax45mp_cpu_l2c_get_cctl_status(void) > > +{ > > + return readl(ax45mp_priv->l2c_base + AX45MP_L2C_REG_STATUS_OFFSET); > > +} > > + > > +static inline uint32_t ax45mp_cpu_l2c_ctl_status(void) > > +{ > > + return readl(ax45mp_priv->l2c_base + AX45MP_L2C_REG_CTL_OFFSET); > > +} > > + > > +static bool ax45mp_cpu_cache_controlable(void) > > +{ > > + return (((ax45mp_cpu_get_micm_cfg_status() & AX45MP_MICM_CFG_ISZ_MASK) || > > + (ax45mp_cpu_get_mdcm_cfg_status() & AX45MP_MDCM_CFG_DSZ_MASK)) && > > + (ax45mp_cpu_get_misa_cfg_status() & AX45MP_MISA_20_MASK) && > > + (ax45mp_cpu_get_mmsc_cfg_status() & AX45MP_MMSC_CFG_CCTLCSR_MASK) && > > + (ax45mp_cpu_get_mcache_ctl_status() & AX45MP_MCACHE_CTL_CCTL_SUEN_MASK)); > > +} > > + > > +static void ax45mp_cpu_dcache_wb_range(void *start, void *end, int line_size) > > +{ > > + void __iomem *base = ax45mp_priv->l2c_base; > > + unsigned long pa; > > + int mhartid = 0; > > +#ifdef CONFIG_SMP > > + mhartid = smp_processor_id(); > > +#endif > > This doesn't need an #ifdef. smp_processor_id() already returns zero > when SMP is disabled. > Ok, I'll get rid of this check. > > + > > + while (end > start) { > > + if (ax45mp_priv->ucctl_ok) { > > + csr_write(AX45MP_CCTL_REG_UCCTLBEGINADDR_NUM, start); > > + csr_write(AX45MP_CCTL_REG_UCCTLCOMMAND_NUM, AX45MP_CCTL_L1D_VA_WB); > > + } > > + > > + if (ax45mp_priv->l2cache_enabled) { > > + pa = virt_to_phys(start); > > + writel(pa, base + AX45MP_L2C_REG_CN_ACC_OFFSET(mhartid)); > > + writel(AX45MP_CCTL_L2_PA_WB, > > + base + AX45MP_L2C_REG_CN_CMD_OFFSET(mhartid)); > > + while ((ax45mp_cpu_l2c_get_cctl_status() & > > + AX45MP_CCTL_L2_STATUS_CN_MASK(mhartid)) != > > + AX45MP_CCTL_L2_STATUS_IDLE) > > + ; > > + } > > + > > + start += line_size; > > + } > > +} > > + > > +static void ax45mp_cpu_dcache_inval_range(void *start, void *end, int line_size) > > +{ > > + void __iomem *base = ax45mp_priv->l2c_base; > > + unsigned long pa; > > + int mhartid = 0; > > +#ifdef CONFIG_SMP > > + mhartid = smp_processor_id(); > > +#endif > > + > > + while (end > start) { > > + if (ax45mp_priv->ucctl_ok) { > > + csr_write(AX45MP_CCTL_REG_UCCTLBEGINADDR_NUM, start); > > + csr_write(AX45MP_CCTL_REG_UCCTLCOMMAND_NUM, AX45MP_CCTL_L1D_VA_INVAL); > > + } > > + > > + if (ax45mp_priv->l2cache_enabled) { > > + pa = virt_to_phys(start); > > + writel(pa, base + AX45MP_L2C_REG_CN_ACC_OFFSET(mhartid)); > > + writel(AX45MP_CCTL_L2_PA_INVAL, > > + base + AX45MP_L2C_REG_CN_CMD_OFFSET(mhartid)); > > + while ((ax45mp_cpu_l2c_get_cctl_status() & > > + AX45MP_CCTL_L2_STATUS_CN_MASK(mhartid)) != > > + AX45MP_CCTL_L2_STATUS_IDLE) > > + ; > > + } > > + > > + start += line_size; > > + } > > +} > > + > > +static void ax45mp_cpu_dma_inval_range(void *vaddr, size_t size) > > +{ > > + char cache_buf[2][AX45MP_MAX_CACHE_LINE_SIZE]; > > + unsigned long start = (unsigned long)vaddr; > > + unsigned long end = start + size; > > + unsigned long old_start = start; > > + unsigned long old_end = end; > > + unsigned long line_size; > > + unsigned long flags; > > + > > + if (static_branch_unlikely(&ax45mp_l2c_configured) && !ax45mp_priv) > > + return; > > + > > + if (unlikely(start == end)) > > + return; > > + > > + line_size = ax45mp_priv->ax45mp_cache_line_size; > > + > > + memset(&cache_buf, 0x0, sizeof(cache_buf)); > > + start = start & (~(line_size - 1)); > > + end = ((end + line_size - 1) & (~(line_size - 1))); > > + > > + local_irq_save(flags); > > + if (unlikely(start != old_start)) > > + memcpy(&cache_buf[0][0], (void *)start, line_size); > > + > > + if (unlikely(end != old_end)) > > + memcpy(&cache_buf[1][0], (void *)(old_end & (~(line_size - 1))), line_size); > > The memcpy dance is only required if ax45mp_cache_line_size is larger > than ARCH_DMA_MINALIGN. Is that actually the case in practice? If not, > you could verify this in the probe function, and remove this logic. > OK... > > + > > + ax45mp_cpu_dcache_inval_range(vaddr, (void *)end, line_size); > > + > > + if (unlikely(start != old_start)) > > + memcpy((void *)start, &cache_buf[0][0], (old_start & (line_size - 1))); > > + > > + if (unlikely(end != old_end)) > > + memcpy((void *)(old_end + 1), > > + &cache_buf[1][(old_end & (line_size - 1)) + 1], > > + end - old_end - 1); > > + > > + local_irq_restore(flags); > > +} > > + > > +static void ax45mp_cpu_dma_wb_range(void *vaddr, size_t size) > > +{ > > + unsigned long start = (unsigned long)vaddr; > > + unsigned long end = start + size; > > + unsigned long line_size; > > + unsigned long flags; > > + > > + if (static_branch_unlikely(&ax45mp_l2c_configured) && !ax45mp_priv) > > + return; > > + > > + line_size = ax45mp_priv->ax45mp_cache_line_size; > > + local_irq_save(flags); > > + start = start & (~(line_size - 1)); > > + ax45mp_cpu_dcache_wb_range(vaddr, (void *)end, line_size); > > + local_irq_restore(flags); > > +} > > + > > +void ax45mp_no_iocp_cmo(unsigned int cache_size, void *vaddr, size_t size, int dir, int ops) > > +{ > > + if (ops == NON_COHERENT_DMA_PREP) > > + return; > > + > > + if (ops == NON_COHERENT_SYNC_DMA_FOR_DEVICE) { > > + switch (dir) { > > + case DMA_FROM_DEVICE: > > + ax45mp_cpu_dma_inval_range(vaddr, size); > > + break; > > + case DMA_TO_DEVICE: > > + case DMA_BIDIRECTIONAL: > > + ax45mp_cpu_dma_wb_range(vaddr, size); > > + break; > > + default: > > + break; > > + } > > + return; > > + } > > + > > + /* op == NON_COHERENT_SYNC_DMA_FOR_CPU */ > > + if (dir == DMA_BIDIRECTIONAL || dir == DMA_FROM_DEVICE) > > + ax45mp_cpu_dma_inval_range(vaddr, size); > > +} > > +EXPORT_SYMBOL(ax45mp_no_iocp_cmo); > > + > > +static int ax45mp_configure_l2_cache(struct device_node *np) > > +{ > > + int ret; > > + > > + ret = of_property_read_u32(np, "cache-line-size", &ax45mp_priv->ax45mp_cache_line_size); > > + if (ret) { > > + pr_err("Failed to get cache-line-size defaulting to 64 bytes\n"); > > + ax45mp_priv->ax45mp_cache_line_size = SZ_64; > > + } > > + > > + if (ax45mp_priv->ax45mp_cache_line_size != SZ_64) { > > + pr_err("Expected cache-line-size to 64 bytes (found:%u). Defaulting to 64 bytes\n", > > + ax45mp_priv->ax45mp_cache_line_size); > > + ax45mp_priv->ax45mp_cache_line_size = SZ_64; > > + } > > Ah, so you already do this. And SZ_64 == ARCH_DMA_MINALIGN. So you do > not need the memcpy logic. > ... I did some initial testing and all seems to be OK. I'll get rid of that check. > > + > > + ax45mp_priv->ucctl_ok = ax45mp_cpu_cache_controlable(); > > + ax45mp_priv->l2cache_enabled = ax45mp_cpu_l2c_ctl_status() & AX45MP_L2_CACHE_CTL_CEN_MASK; > > + > > + return 0; > > +} > > + > > +static int ax45mp_l2c_probe(struct platform_device *pdev) > > +{ > > + struct device_node *np = pdev->dev.of_node; > > + int ret; > > + > > + ax45mp_priv = devm_kzalloc(&pdev->dev, sizeof(*ax45mp_priv), GFP_KERNEL); > > + if (!ax45mp_priv) > > + return -ENOMEM; > > + > > + ax45mp_priv->l2c_base = devm_of_iomap(&pdev->dev, pdev->dev.of_node, 0, NULL); > > devm_platform_ioremap_resource() > OK. > > + if (!ax45mp_priv->l2c_base) { > > + ret = -ENOMEM; > > + goto l2c_err; > > + } > > + > > + ret = ax45mp_configure_l2_cache(np); > > + if (ret) > > + goto l2c_err; > > + > > + ret = ax45mp_configure_pma_regions(np); > > + if (ret) > > + goto l2c_err; > > + > > + static_branch_disable(&ax45mp_l2c_configured); > > Instead of enabling this before the probe function, and disabling it > afterward, just enable it once here, in the success case. Then you can > drop the !ax45mp_priv check in the functions above. > I think I had tried it but static_branch_unlikely() was always returning true. > And none of the functions would get called anyway if the alternative is > not applied. I suppose it's not possible to do some of this probe logic > in the alternative check function? > you mean to check in the vendor errata patch function to see if this driver has probed? > > + > > + return 0; > > + > > +l2c_err: > > + devm_kfree(&pdev->dev, ax45mp_priv); > > + ax45mp_priv = NULL; > > None of this cleanup is necessary. > Agreed, I'll drop it. Cheers, Prabhakar
Hi Prabhakar, On Sat, Nov 26, 2022 at 10:10 PM Lad, Prabhakar <prabhakar.csengg@gmail.com> wrote: > On Fri, Nov 25, 2022 at 7:43 PM Samuel Holland <samuel@sholland.org> wrote: > > On 11/24/22 11:22, Prabhakar wrote: > > > From: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com> > > > > > > On the AX45MP core, cache coherency is a specification option so it may > > > not be supported. In this case DMA will fail. As a workaround, firstly we > > > allocate a global dma coherent pool from which DMA allocations are taken > > > and marked as non-cacheable + bufferable using the PMA region as specified > > > in the device tree. Synchronization callbacks are implemented to > > > synchronize when doing DMA transactions. > > > > > > The Andes AX45MP core has a Programmable Physical Memory Attributes (PMA) > > > block that allows dynamic adjustment of memory attributes in the runtime. > > > It contains a configurable amount of PMA entries implemented as CSR > > > registers to control the attributes of memory locations in interest. > > > > > > Below are the memory attributes supported: > > > * Device, Non-bufferable > > > * Device, bufferable > > > * Memory, Non-cacheable, Non-bufferable > > > * Memory, Non-cacheable, Bufferable > > > * Memory, Write-back, No-allocate > > > * Memory, Write-back, Read-allocate > > > * Memory, Write-back, Write-allocate > > > * Memory, Write-back, Read and Write-allocate > > > > > > This patch adds support to configure the memory attributes of the memory > > > regions as passed from the l2 cache node and exposes the cache management > > > ops. > > > > Forgive my ignorance, but why do you need both a DMA pool and explicit > > cache maintenance? Wouldn't the purpose of marking a memory region as > > permanently non-cacheable be to avoid cache maintenance? And likewise, > > if you are doing cache maintenance anyway, why does it matter if/how the > > memory is cacheable? > > > "Memory, Non-cacheable, Bufferable" raises an AXI signal for > transactions hence needing SW implementation for cache maintenance. > > > > More info about PMA (section 10.3): > > > Link: http://www.andestech.com/wp-content/uploads/AX45MP-1C-Rev.-5.0.0-Datasheet.pdf > > > > > > Signed-off-by: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com> > > > +static int ax45mp_configure_pma_regions(struct device_node *np) > > > +{ > > > + const char *propname = "andestech,pma-regions"; > > > + u32 start, size, flags; > > > + unsigned int entry_id; > > > + unsigned int i; > > > + int count; > > > + int ret; > > > + > > > + count = of_property_count_elems_of_size(np, propname, sizeof(u32) * 3); > > > + if (count < 0) > > > + return count; > > > + > > > + if (count > AX45MP_MAX_PMA_REGIONS) > > > + return -EINVAL; > > > + > > > + for (i = 0, entry_id = 0 ; entry_id < count ; i += 3, entry_id++) { > > > + of_property_read_u32_index(np, propname, i, &start); > > > + of_property_read_u32_index(np, propname, i + 1, &size); > > > + of_property_read_u32_index(np, propname, i + 2, &flags); > > > + ret = ax45mp_sbi_set_pma(start, size, flags, entry_id); > > > + if (!ret) > > > + pr_err("Failed to setup PMA region 0x%x - 0x%x flags: 0x%x", > > > + start, start + size, flags); > > > + } > > > + > > > + return 0; > > > +} > > > > If firmware support is required to set up these PMA regions, why is > > Linux doing this at all? The firmware has access to the devicetree as > > well. It can set this up before entering S-mode, and then you don't need > > to expose this capability via an SBI extension. In fact, firmware could > > generate the reserved-memory node based on these regions at runtime (or > > vice versa). > > > That's a good point. I'll do some research on this and get back. > > Btw are there any existing examples where the firmware adds DT nodes? /memory, reserved-memory, optee on ARM, RPC status on R-Car Gen3/4, ... Gr{oetje,eeting}s, Geert -- Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org In personal conversations with technical people, I call myself a hacker. But when I'm talking to journalists I just say "programmer" or something like that. -- Linus Torvalds
Hi Geert, On Sun, Nov 27, 2022 at 9:55 AM Geert Uytterhoeven <geert@linux-m68k.org> wrote: > > Hi Prabhakar, > > On Sat, Nov 26, 2022 at 10:10 PM Lad, Prabhakar > <prabhakar.csengg@gmail.com> wrote: > > On Fri, Nov 25, 2022 at 7:43 PM Samuel Holland <samuel@sholland.org> wrote: > > > On 11/24/22 11:22, Prabhakar wrote: > > > > From: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com> > > > > > > > > On the AX45MP core, cache coherency is a specification option so it may > > > > not be supported. In this case DMA will fail. As a workaround, firstly we > > > > allocate a global dma coherent pool from which DMA allocations are taken > > > > and marked as non-cacheable + bufferable using the PMA region as specified > > > > in the device tree. Synchronization callbacks are implemented to > > > > synchronize when doing DMA transactions. > > > > > > > > The Andes AX45MP core has a Programmable Physical Memory Attributes (PMA) > > > > block that allows dynamic adjustment of memory attributes in the runtime. > > > > It contains a configurable amount of PMA entries implemented as CSR > > > > registers to control the attributes of memory locations in interest. > > > > > > > > Below are the memory attributes supported: > > > > * Device, Non-bufferable > > > > * Device, bufferable > > > > * Memory, Non-cacheable, Non-bufferable > > > > * Memory, Non-cacheable, Bufferable > > > > * Memory, Write-back, No-allocate > > > > * Memory, Write-back, Read-allocate > > > > * Memory, Write-back, Write-allocate > > > > * Memory, Write-back, Read and Write-allocate > > > > > > > > This patch adds support to configure the memory attributes of the memory > > > > regions as passed from the l2 cache node and exposes the cache management > > > > ops. > > > > > > Forgive my ignorance, but why do you need both a DMA pool and explicit > > > cache maintenance? Wouldn't the purpose of marking a memory region as > > > permanently non-cacheable be to avoid cache maintenance? And likewise, > > > if you are doing cache maintenance anyway, why does it matter if/how the > > > memory is cacheable? > > > > > "Memory, Non-cacheable, Bufferable" raises an AXI signal for > > transactions hence needing SW implementation for cache maintenance. > > > > > > More info about PMA (section 10.3): > > > > Link: http://www.andestech.com/wp-content/uploads/AX45MP-1C-Rev.-5.0.0-Datasheet.pdf > > > > > > > > Signed-off-by: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com> > > > > > +static int ax45mp_configure_pma_regions(struct device_node *np) > > > > +{ > > > > + const char *propname = "andestech,pma-regions"; > > > > + u32 start, size, flags; > > > > + unsigned int entry_id; > > > > + unsigned int i; > > > > + int count; > > > > + int ret; > > > > + > > > > + count = of_property_count_elems_of_size(np, propname, sizeof(u32) * 3); > > > > + if (count < 0) > > > > + return count; > > > > + > > > > + if (count > AX45MP_MAX_PMA_REGIONS) > > > > + return -EINVAL; > > > > + > > > > + for (i = 0, entry_id = 0 ; entry_id < count ; i += 3, entry_id++) { > > > > + of_property_read_u32_index(np, propname, i, &start); > > > > + of_property_read_u32_index(np, propname, i + 1, &size); > > > > + of_property_read_u32_index(np, propname, i + 2, &flags); > > > > + ret = ax45mp_sbi_set_pma(start, size, flags, entry_id); > > > > + if (!ret) > > > > + pr_err("Failed to setup PMA region 0x%x - 0x%x flags: 0x%x", > > > > + start, start + size, flags); > > > > + } > > > > + > > > > + return 0; > > > > +} > > > > > > If firmware support is required to set up these PMA regions, why is > > > Linux doing this at all? The firmware has access to the devicetree as > > > well. It can set this up before entering S-mode, and then you don't need > > > to expose this capability via an SBI extension. In fact, firmware could > > > generate the reserved-memory node based on these regions at runtime (or > > > vice versa). > > > > > That's a good point. I'll do some research on this and get back. > > > > Btw are there any existing examples where the firmware adds DT nodes? > > /memory, reserved-memory, optee on ARM, RPC status on R-Car Gen3/4, ... > On the TF-A we pass the FDT blob to u-boot and this does the magic. On the RISC-V what would be the correct approach? - We setup the PMA regions in OpenSBI - We provide a vendor specific EXT to check if the PMA is setup - In u-boot ft_board_setup() callback add the reserved-memory node Does the above approach sound good or is there a better approach I'm missing? Cheers, Prabhakar
On 11/28/22 06:08, Lad, Prabhakar wrote: > Hi Geert, > > On Sun, Nov 27, 2022 at 9:55 AM Geert Uytterhoeven <geert@linux-m68k.org> wrote: >> >> Hi Prabhakar, >> >> On Sat, Nov 26, 2022 at 10:10 PM Lad, Prabhakar >> <prabhakar.csengg@gmail.com> wrote: >>> On Fri, Nov 25, 2022 at 7:43 PM Samuel Holland <samuel@sholland.org> wrote: >>>> On 11/24/22 11:22, Prabhakar wrote: >>>>> From: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com> >>>>> >>>>> On the AX45MP core, cache coherency is a specification option so it may >>>>> not be supported. In this case DMA will fail. As a workaround, firstly we >>>>> allocate a global dma coherent pool from which DMA allocations are taken >>>>> and marked as non-cacheable + bufferable using the PMA region as specified >>>>> in the device tree. Synchronization callbacks are implemented to >>>>> synchronize when doing DMA transactions. >>>>> >>>>> The Andes AX45MP core has a Programmable Physical Memory Attributes (PMA) >>>>> block that allows dynamic adjustment of memory attributes in the runtime. >>>>> It contains a configurable amount of PMA entries implemented as CSR >>>>> registers to control the attributes of memory locations in interest. >>>>> >>>>> Below are the memory attributes supported: >>>>> * Device, Non-bufferable >>>>> * Device, bufferable >>>>> * Memory, Non-cacheable, Non-bufferable >>>>> * Memory, Non-cacheable, Bufferable >>>>> * Memory, Write-back, No-allocate >>>>> * Memory, Write-back, Read-allocate >>>>> * Memory, Write-back, Write-allocate >>>>> * Memory, Write-back, Read and Write-allocate >>>>> >>>>> This patch adds support to configure the memory attributes of the memory >>>>> regions as passed from the l2 cache node and exposes the cache management >>>>> ops. >>>> >>>> Forgive my ignorance, but why do you need both a DMA pool and explicit >>>> cache maintenance? Wouldn't the purpose of marking a memory region as >>>> permanently non-cacheable be to avoid cache maintenance? And likewise, >>>> if you are doing cache maintenance anyway, why does it matter if/how the >>>> memory is cacheable? >>>> >>> "Memory, Non-cacheable, Bufferable" raises an AXI signal for >>> transactions hence needing SW implementation for cache maintenance. >>> >>>>> More info about PMA (section 10.3): >>>>> Link: http://www.andestech.com/wp-content/uploads/AX45MP-1C-Rev.-5.0.0-Datasheet.pdf >>>>> >>>>> Signed-off-by: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com> >> >>>>> +static int ax45mp_configure_pma_regions(struct device_node *np) >>>>> +{ >>>>> + const char *propname = "andestech,pma-regions"; >>>>> + u32 start, size, flags; >>>>> + unsigned int entry_id; >>>>> + unsigned int i; >>>>> + int count; >>>>> + int ret; >>>>> + >>>>> + count = of_property_count_elems_of_size(np, propname, sizeof(u32) * 3); >>>>> + if (count < 0) >>>>> + return count; >>>>> + >>>>> + if (count > AX45MP_MAX_PMA_REGIONS) >>>>> + return -EINVAL; >>>>> + >>>>> + for (i = 0, entry_id = 0 ; entry_id < count ; i += 3, entry_id++) { >>>>> + of_property_read_u32_index(np, propname, i, &start); >>>>> + of_property_read_u32_index(np, propname, i + 1, &size); >>>>> + of_property_read_u32_index(np, propname, i + 2, &flags); >>>>> + ret = ax45mp_sbi_set_pma(start, size, flags, entry_id); >>>>> + if (!ret) >>>>> + pr_err("Failed to setup PMA region 0x%x - 0x%x flags: 0x%x", >>>>> + start, start + size, flags); >>>>> + } >>>>> + >>>>> + return 0; >>>>> +} >>>> >>>> If firmware support is required to set up these PMA regions, why is >>>> Linux doing this at all? The firmware has access to the devicetree as >>>> well. It can set this up before entering S-mode, and then you don't need >>>> to expose this capability via an SBI extension. In fact, firmware could >>>> generate the reserved-memory node based on these regions at runtime (or >>>> vice versa). >>>> >>> That's a good point. I'll do some research on this and get back. >>> >>> Btw are there any existing examples where the firmware adds DT nodes? >> >> /memory, reserved-memory, optee on ARM, RPC status on R-Car Gen3/4, ... >> > On the TF-A we pass the FDT blob to u-boot and this does the magic. > > On the RISC-V what would be the correct approach? > - We setup the PMA regions in OpenSBI > - We provide a vendor specific EXT to check if the PMA is setup > - In u-boot ft_board_setup() callback add the reserved-memory node > > Does the above approach sound good or is there a better approach I'm missing? My suggestion was to fix up the DT in OpenSBI itself. See lib/utils/fdt/fdt_fixup.c in the OpenSBI source tree. There is also a platform hook for this. Then OpenSBI passes the FDT to U-Boot, and U-Boot passes it on to Linux. No SBI extension is needed in that case. If you optionally want your U-Boot to support loading a replacement FDT from disk, then ft_board_setup() would need to copy the reserved-memory nodes from U-Boot's control FDT to the loaded FDT. But this logic is the same for all reserved-memory nodes, including the one OpenSBI adds already. U-Boot has some code for this copying which you could reuse. Regards, Samuel
On 11/26/22 15:09, Lad, Prabhakar wrote: >>> + if (!ax45mp_priv->l2c_base) { >>> + ret = -ENOMEM; >>> + goto l2c_err; >>> + } >>> + >>> + ret = ax45mp_configure_l2_cache(np); >>> + if (ret) >>> + goto l2c_err; >>> + >>> + ret = ax45mp_configure_pma_regions(np); >>> + if (ret) >>> + goto l2c_err; >>> + >>> + static_branch_disable(&ax45mp_l2c_configured); >> >> Instead of enabling this before the probe function, and disabling it >> afterward, just enable it once here, in the success case. Then you can >> drop the !ax45mp_priv check in the functions above. >> > I think I had tried it but static_branch_unlikely() was always returning true. You use DEFINE_STATIC_KEY_FALSE above, so static_branch_unlikely() should return false until you call static_branch_enable(). >> And none of the functions would get called anyway if the alternative is >> not applied. I suppose it's not possible to do some of this probe logic >> in the alternative check function? >> > you mean to check in the vendor errata patch function to see if this > driver has probed? I meant to do the equivalent of: + ax45mp_priv->ucctl_ok = ax45mp_cpu_cache_controlable(); + ax45mp_priv->l2cache_enabled = ax45mp_cpu_l2c_ctl_status() & AX45MP_L2_CACHE_CTL_CEN_MASK; in the errata function, since that decides if the cache maintenance functions actually do anything. But ax45mp_cpu_l2c_ctl_status() gets the MMIO address from the DT, and trying to do that from the errata function could get ugly, so maybe it is not a good suggestion. Regards, Samuel
Hi Samuel, On Tue, Nov 29, 2022 at 5:58 AM Samuel Holland <samuel@sholland.org> wrote: > > On 11/26/22 15:09, Lad, Prabhakar wrote: > >>> + if (!ax45mp_priv->l2c_base) { > >>> + ret = -ENOMEM; > >>> + goto l2c_err; > >>> + } > >>> + > >>> + ret = ax45mp_configure_l2_cache(np); > >>> + if (ret) > >>> + goto l2c_err; > >>> + > >>> + ret = ax45mp_configure_pma_regions(np); > >>> + if (ret) > >>> + goto l2c_err; > >>> + > >>> + static_branch_disable(&ax45mp_l2c_configured); > >> > >> Instead of enabling this before the probe function, and disabling it > >> afterward, just enable it once here, in the success case. Then you can > >> drop the !ax45mp_priv check in the functions above. > >> > > I think I had tried it but static_branch_unlikely() was always returning true. > > You use DEFINE_STATIC_KEY_FALSE above, so static_branch_unlikely() > should return false until you call static_branch_enable(). > OK, got that. > >> And none of the functions would get called anyway if the alternative is > >> not applied. I suppose it's not possible to do some of this probe logic > >> in the alternative check function? > >> > > you mean to check in the vendor errata patch function to see if this > > driver has probed? > > I meant to do the equivalent of: > > + ax45mp_priv->ucctl_ok = ax45mp_cpu_cache_controlable(); > + ax45mp_priv->l2cache_enabled = ax45mp_cpu_l2c_ctl_status() & > AX45MP_L2_CACHE_CTL_CEN_MASK; > > in the errata function, since that decides if the cache maintenance > functions actually do anything. But ax45mp_cpu_l2c_ctl_status() gets the > MMIO address from the DT, and trying to do that from the errata function > could get ugly, so maybe it is not a good suggestion. > Actually I did think about this and the best approach is to do it in errata only as you suggested. So here's my approach for dropping the above checks is to introduce vendor specific SBI EXT (RZFIVE_SBI_EXT_IOCP_SW_WORKAROUND) which will check both the above conditions and only apply the errata on success and hence avoid the "if" checks every time in the sync operation. Cheers, Prabhakar
diff --git a/arch/riscv/include/asm/cacheflush.h b/arch/riscv/include/asm/cacheflush.h index 4a04d1be7c67..3226f3aceafe 100644 --- a/arch/riscv/include/asm/cacheflush.h +++ b/arch/riscv/include/asm/cacheflush.h @@ -61,6 +61,14 @@ static inline void riscv_noncoherent_supported(void) {} #define SYS_RISCV_FLUSH_ICACHE_LOCAL 1UL #define SYS_RISCV_FLUSH_ICACHE_ALL (SYS_RISCV_FLUSH_ICACHE_LOCAL) +#ifdef CONFIG_AX45MP_L2_CACHE +extern asmlinkage void ax45mp_no_iocp_cmo(unsigned int cache_size, void *vaddr, + size_t size, int dir, int ops); +#else +inline void ax45mp_no_iocp_cmo(unsigned int cache_size, void *vaddr, + size_t size, int dir, int ops) {} +#endif + #include <asm-generic/cacheflush.h> #endif /* _ASM_RISCV_CACHEFLUSH_H */ diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h index 48e899a8e7a9..300fed3bfd80 100644 --- a/arch/riscv/include/asm/errata_list.h +++ b/arch/riscv/include/asm/errata_list.h @@ -125,8 +125,8 @@ asm volatile(ALTERNATIVE( \ #define THEAD_SYNC_S ".long 0x0190000b" #define ALT_CMO_OP(_op, _start, _size, _cachesize, _dir, _ops) \ -asm volatile(ALTERNATIVE_2( \ - __nops(6), \ +asm volatile(ALTERNATIVE_3( \ + __nops(14), \ "mv a0, %1\n\t" \ "j 2f\n\t" \ "3:\n\t" \ @@ -134,7 +134,7 @@ asm volatile(ALTERNATIVE_2( \ "add a0, a0, %0\n\t" \ "2:\n\t" \ "bltu a0, %2, 3b\n\t" \ - "nop", 0, CPUFEATURE_ZICBOM, CONFIG_RISCV_ISA_ZICBOM, \ + __nops(8), 0, CPUFEATURE_ZICBOM, CONFIG_RISCV_ISA_ZICBOM, \ "mv a0, %1\n\t" \ "j 2f\n\t" \ "3:\n\t" \ @@ -142,8 +142,28 @@ asm volatile(ALTERNATIVE_2( \ "add a0, a0, %0\n\t" \ "2:\n\t" \ "bltu a0, %2, 3b\n\t" \ - THEAD_SYNC_S, THEAD_VENDOR_ID, \ - ERRATA_THEAD_CMO, CONFIG_ERRATA_THEAD_CMO) \ + THEAD_SYNC_S "\n\t" \ + __nops(8), THEAD_VENDOR_ID, \ + ERRATA_THEAD_CMO, CONFIG_ERRATA_THEAD_CMO, \ + ".option push\n\t\n\t" \ + ".option norvc\n\t" \ + ".option norelax\n\t" \ + "addi sp,sp,-16\n\t" \ + "sd s0,0(sp)\n\t" \ + "sd ra,8(sp)\n\t" \ + "addi s0,sp,16\n\t" \ + "mv a4,%6\n\t" \ + "mv a3,%5\n\t" \ + "mv a2,%4\n\t" \ + "mv a1,%3\n\t" \ + "mv a0,%0\n\t" \ + "call ax45mp_no_iocp_cmo\n\t" \ + "ld ra,8(sp)\n\t" \ + "ld s0,0(sp)\n\t" \ + "addi sp,sp,16\n\t" \ + ".option pop\n\t", \ + ANDESTECH_VENDOR_ID, ERRATA_ANDESTECH_NO_IOCP, \ + CONFIG_ERRATA_ANDES_CMO) \ : : "r"(_cachesize), \ "r"((unsigned long)(_start) & ~((_cachesize) - 1UL)), \ "r"((unsigned long)(_start) + (_size)), \ @@ -151,7 +171,7 @@ asm volatile(ALTERNATIVE_2( \ "r"((unsigned long)(_size)), \ "r"((unsigned long)(_dir)), \ "r"((unsigned long)(_ops)) \ - : "a0") + : "a0", "a1", "a2", "a3", "a4", "memory") #define THEAD_C9XX_RV_IRQ_PMU 17 #define THEAD_C9XX_CSR_SCOUNTEROF 0x5c5 diff --git a/drivers/soc/renesas/Kconfig b/drivers/soc/renesas/Kconfig index 660498252ec5..e7810256c60d 100644 --- a/drivers/soc/renesas/Kconfig +++ b/drivers/soc/renesas/Kconfig @@ -340,9 +340,16 @@ if RISCV config ARCH_R9A07G043 bool "RISC-V Platform support for RZ/Five" select ARCH_RZG2L + select AX45MP_L2_CACHE + select DMA_GLOBAL_POOL + select ERRATA_ANDES + select ERRATA_ANDES_CMO + select RISCV_DMA_NONCOHERENT help This enables support for the Renesas RZ/Five SoC. +source "drivers/soc/renesas/rzfive/Kconfig" + endif # RISCV config RST_RCAR diff --git a/drivers/soc/renesas/Makefile b/drivers/soc/renesas/Makefile index 535868c9c7e4..9df9f759a039 100644 --- a/drivers/soc/renesas/Makefile +++ b/drivers/soc/renesas/Makefile @@ -31,6 +31,8 @@ ifdef CONFIG_SMP obj-$(CONFIG_ARCH_R9A06G032) += r9a06g032-smp.o endif +obj-$(CONFIG_RISCV) += rzfive/ + # Family obj-$(CONFIG_RST_RCAR) += rcar-rst.o obj-$(CONFIG_SYSC_RCAR) += rcar-sysc.o diff --git a/drivers/soc/renesas/rzfive/Kconfig b/drivers/soc/renesas/rzfive/Kconfig new file mode 100644 index 000000000000..b6bc00337d99 --- /dev/null +++ b/drivers/soc/renesas/rzfive/Kconfig @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0 + +config AX45MP_L2_CACHE + bool "Andes Technology AX45MP L2 Cache controller" + help + Support for the L2 cache controller on Andes Technology AX45MP platforms. diff --git a/drivers/soc/renesas/rzfive/Makefile b/drivers/soc/renesas/rzfive/Makefile new file mode 100644 index 000000000000..2012e7fb978d --- /dev/null +++ b/drivers/soc/renesas/rzfive/Makefile @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0 + +obj-$(CONFIG_AX45MP_L2_CACHE) += ax45mp_cache.o diff --git a/drivers/soc/renesas/rzfive/ax45mp_cache.c b/drivers/soc/renesas/rzfive/ax45mp_cache.c new file mode 100644 index 000000000000..4e0d0545d3af --- /dev/null +++ b/drivers/soc/renesas/rzfive/ax45mp_cache.c @@ -0,0 +1,415 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * PMA setup and non-coherent cache functions for Andes AX45MP + * + * Copyright (C) 2022 Renesas Electronics Corp. + */ + +#include <linux/cacheflush.h> +#include <linux/cacheinfo.h> +#include <linux/dma-direction.h> +#include <linux/of_address.h> +#include <linux/of_platform.h> + +#include <asm/cacheflush.h> +#include <asm/sbi.h> + +#include "ax45mp_sbi.h" + +/* L2 cache registers */ +#define AX45MP_L2C_REG_CTL_OFFSET 0x8 + +#define AX45MP_L2C_REG_C0_CMD_OFFSET 0x40 +#define AX45MP_L2C_REG_C0_ACC_OFFSET 0x48 +#define AX45MP_L2C_REG_STATUS_OFFSET 0x80 + +/* D-cache operation */ +#define AX45MP_CCTL_L1D_VA_INVAL 0 +#define AX45MP_CCTL_L1D_VA_WB 1 + +/* L2 cache */ +#define AX45MP_L2_CACHE_CTL_CEN_MASK 1 + +/* L2 CCTL status */ +#define AX45MP_CCTL_L2_STATUS_IDLE 0 + +/* L2 CCTL status cores mask */ +#define AX45MP_CCTL_L2_STATUS_C0_MASK 0xf + +/* L2 cache operation */ +#define AX45MP_CCTL_L2_PA_INVAL 0x8 +#define AX45MP_CCTL_L2_PA_WB 0x9 + +#define AX45MP_L2C_HPM_PER_CORE_OFFSET 0x8 +#define AX45MP_L2C_REG_PER_CORE_OFFSET 0x10 +#define AX45MP_CCTL_L2_STATUS_PER_CORE_OFFSET 4 + +#define AX45MP_L2C_REG_CN_CMD_OFFSET(n) \ + (AX45MP_L2C_REG_C0_CMD_OFFSET + ((n) * AX45MP_L2C_REG_PER_CORE_OFFSET)) +#define AX45MP_L2C_REG_CN_ACC_OFFSET(n) \ + (AX45MP_L2C_REG_C0_ACC_OFFSET + ((n) * AX45MP_L2C_REG_PER_CORE_OFFSET)) +#define AX45MP_CCTL_L2_STATUS_CN_MASK(n) \ + (AX45MP_CCTL_L2_STATUS_C0_MASK << ((n) * AX45MP_CCTL_L2_STATUS_PER_CORE_OFFSET)) + +#define AX45MP_MICM_CFG_ISZ_OFFSET 6 +#define AX45MP_MICM_CFG_ISZ_MASK (0x7 << AX45MP_MICM_CFG_ISZ_OFFSET) + +#define AX45MP_MDCM_CFG_DSZ_OFFSET 6 +#define AX45MP_MDCM_CFG_DSZ_MASK (0x7 << AX45MP_MDCM_CFG_DSZ_OFFSET) + +#define AX45MP_CCTL_REG_UCCTLBEGINADDR_NUM 0x80b +#define AX45MP_CCTL_REG_UCCTLCOMMAND_NUM 0x80c + +#define AX45MP_MCACHE_CTL_CCTL_SUEN_OFFSET 8 +#define AX45MP_MMSC_CFG_CCTLCSR_OFFSET 16 +#define AX45MP_MISA_20_OFFSET 20 + +#define AX45MP_MCACHE_CTL_CCTL_SUEN_MASK (0x1 << AX45MP_MCACHE_CTL_CCTL_SUEN_OFFSET) +#define AX45MP_MMSC_CFG_CCTLCSR_MASK (0x1 << AX45MP_MMSC_CFG_CCTLCSR_OFFSET) +#define AX45MP_MISA_20_MASK (0x1 << AX45MP_MISA_20_OFFSET) + +#define AX45MP_MAX_CACHE_LINE_SIZE 256 + +#define AX45MP_MAX_PMA_REGIONS 16 + +struct ax45mp_priv { + void __iomem *l2c_base; + u32 ax45mp_cache_line_size; + bool l2cache_enabled; + bool ucctl_ok; +}; + +static struct ax45mp_priv *ax45mp_priv; +static DEFINE_STATIC_KEY_FALSE(ax45mp_l2c_configured); + +/* PMA setup */ +static long ax45mp_sbi_set_pma(unsigned long start, + unsigned long size, + unsigned long flags, + unsigned int entry_id) +{ + struct sbiret ret; + + ret = sbi_ecall(SBI_EXT_ANDES, AX45MP_SBI_EXT_SET_PMA, + start, size, entry_id, flags, 0, 0); + + return ret.value; +} + +static int ax45mp_configure_pma_regions(struct device_node *np) +{ + const char *propname = "andestech,pma-regions"; + u32 start, size, flags; + unsigned int entry_id; + unsigned int i; + int count; + int ret; + + count = of_property_count_elems_of_size(np, propname, sizeof(u32) * 3); + if (count < 0) + return count; + + if (count > AX45MP_MAX_PMA_REGIONS) + return -EINVAL; + + for (i = 0, entry_id = 0 ; entry_id < count ; i += 3, entry_id++) { + of_property_read_u32_index(np, propname, i, &start); + of_property_read_u32_index(np, propname, i + 1, &size); + of_property_read_u32_index(np, propname, i + 2, &flags); + ret = ax45mp_sbi_set_pma(start, size, flags, entry_id); + if (!ret) + pr_err("Failed to setup PMA region 0x%x - 0x%x flags: 0x%x", + start, start + size, flags); + } + + return 0; +} + +/* L2 Cache operations */ +static uint32_t ax45mp_cpu_get_mcache_ctl_status(void) +{ + struct sbiret ret; + + ret = sbi_ecall(SBI_EXT_ANDES, AX45MP_SBI_EXT_GET_MCACHE_CTL_STATUS, + 0, 0, 0, 0, 0, 0); + return ret.value; +} + +static uint32_t ax45mp_cpu_get_micm_cfg_status(void) +{ + struct sbiret ret; + + ret = sbi_ecall(SBI_EXT_ANDES, AX45MP_SBI_EXT_GET_MICM_CTL_STATUS, + 0, 0, 0, 0, 0, 0); + return ret.value; +} + +static uint32_t ax45mp_cpu_get_mdcm_cfg_status(void) +{ + struct sbiret ret; + + ret = sbi_ecall(SBI_EXT_ANDES, AX45MP_SBI_EXT_GET_MDCM_CTL_STATUS, + 0, 0, 0, 0, 0, 0); + return ret.value; +} + +static uint32_t ax45mp_cpu_get_mmsc_cfg_status(void) +{ + struct sbiret ret; + + ret = sbi_ecall(SBI_EXT_ANDES, AX45MP_SBI_EXT_GET_MMSC_CTL_STATUS, + 0, 0, 0, 0, 0, 0); + return ret.value; +} + +static uint32_t ax45mp_cpu_get_misa_cfg_status(void) +{ + struct sbiret ret; + + ret = sbi_ecall(SBI_EXT_ANDES, AX45MP_SBI_EXT_GET_MISA_CTL_STATUS, + 0, 0, 0, 0, 0, 0); + return ret.value; +} + +static inline uint32_t ax45mp_cpu_l2c_get_cctl_status(void) +{ + return readl(ax45mp_priv->l2c_base + AX45MP_L2C_REG_STATUS_OFFSET); +} + +static inline uint32_t ax45mp_cpu_l2c_ctl_status(void) +{ + return readl(ax45mp_priv->l2c_base + AX45MP_L2C_REG_CTL_OFFSET); +} + +static bool ax45mp_cpu_cache_controlable(void) +{ + return (((ax45mp_cpu_get_micm_cfg_status() & AX45MP_MICM_CFG_ISZ_MASK) || + (ax45mp_cpu_get_mdcm_cfg_status() & AX45MP_MDCM_CFG_DSZ_MASK)) && + (ax45mp_cpu_get_misa_cfg_status() & AX45MP_MISA_20_MASK) && + (ax45mp_cpu_get_mmsc_cfg_status() & AX45MP_MMSC_CFG_CCTLCSR_MASK) && + (ax45mp_cpu_get_mcache_ctl_status() & AX45MP_MCACHE_CTL_CCTL_SUEN_MASK)); +} + +static void ax45mp_cpu_dcache_wb_range(void *start, void *end, int line_size) +{ + void __iomem *base = ax45mp_priv->l2c_base; + unsigned long pa; + int mhartid = 0; +#ifdef CONFIG_SMP + mhartid = smp_processor_id(); +#endif + + while (end > start) { + if (ax45mp_priv->ucctl_ok) { + csr_write(AX45MP_CCTL_REG_UCCTLBEGINADDR_NUM, start); + csr_write(AX45MP_CCTL_REG_UCCTLCOMMAND_NUM, AX45MP_CCTL_L1D_VA_WB); + } + + if (ax45mp_priv->l2cache_enabled) { + pa = virt_to_phys(start); + writel(pa, base + AX45MP_L2C_REG_CN_ACC_OFFSET(mhartid)); + writel(AX45MP_CCTL_L2_PA_WB, + base + AX45MP_L2C_REG_CN_CMD_OFFSET(mhartid)); + while ((ax45mp_cpu_l2c_get_cctl_status() & + AX45MP_CCTL_L2_STATUS_CN_MASK(mhartid)) != + AX45MP_CCTL_L2_STATUS_IDLE) + ; + } + + start += line_size; + } +} + +static void ax45mp_cpu_dcache_inval_range(void *start, void *end, int line_size) +{ + void __iomem *base = ax45mp_priv->l2c_base; + unsigned long pa; + int mhartid = 0; +#ifdef CONFIG_SMP + mhartid = smp_processor_id(); +#endif + + while (end > start) { + if (ax45mp_priv->ucctl_ok) { + csr_write(AX45MP_CCTL_REG_UCCTLBEGINADDR_NUM, start); + csr_write(AX45MP_CCTL_REG_UCCTLCOMMAND_NUM, AX45MP_CCTL_L1D_VA_INVAL); + } + + if (ax45mp_priv->l2cache_enabled) { + pa = virt_to_phys(start); + writel(pa, base + AX45MP_L2C_REG_CN_ACC_OFFSET(mhartid)); + writel(AX45MP_CCTL_L2_PA_INVAL, + base + AX45MP_L2C_REG_CN_CMD_OFFSET(mhartid)); + while ((ax45mp_cpu_l2c_get_cctl_status() & + AX45MP_CCTL_L2_STATUS_CN_MASK(mhartid)) != + AX45MP_CCTL_L2_STATUS_IDLE) + ; + } + + start += line_size; + } +} + +static void ax45mp_cpu_dma_inval_range(void *vaddr, size_t size) +{ + char cache_buf[2][AX45MP_MAX_CACHE_LINE_SIZE]; + unsigned long start = (unsigned long)vaddr; + unsigned long end = start + size; + unsigned long old_start = start; + unsigned long old_end = end; + unsigned long line_size; + unsigned long flags; + + if (static_branch_unlikely(&ax45mp_l2c_configured) && !ax45mp_priv) + return; + + if (unlikely(start == end)) + return; + + line_size = ax45mp_priv->ax45mp_cache_line_size; + + memset(&cache_buf, 0x0, sizeof(cache_buf)); + start = start & (~(line_size - 1)); + end = ((end + line_size - 1) & (~(line_size - 1))); + + local_irq_save(flags); + if (unlikely(start != old_start)) + memcpy(&cache_buf[0][0], (void *)start, line_size); + + if (unlikely(end != old_end)) + memcpy(&cache_buf[1][0], (void *)(old_end & (~(line_size - 1))), line_size); + + ax45mp_cpu_dcache_inval_range(vaddr, (void *)end, line_size); + + if (unlikely(start != old_start)) + memcpy((void *)start, &cache_buf[0][0], (old_start & (line_size - 1))); + + if (unlikely(end != old_end)) + memcpy((void *)(old_end + 1), + &cache_buf[1][(old_end & (line_size - 1)) + 1], + end - old_end - 1); + + local_irq_restore(flags); +} + +static void ax45mp_cpu_dma_wb_range(void *vaddr, size_t size) +{ + unsigned long start = (unsigned long)vaddr; + unsigned long end = start + size; + unsigned long line_size; + unsigned long flags; + + if (static_branch_unlikely(&ax45mp_l2c_configured) && !ax45mp_priv) + return; + + line_size = ax45mp_priv->ax45mp_cache_line_size; + local_irq_save(flags); + start = start & (~(line_size - 1)); + ax45mp_cpu_dcache_wb_range(vaddr, (void *)end, line_size); + local_irq_restore(flags); +} + +void ax45mp_no_iocp_cmo(unsigned int cache_size, void *vaddr, size_t size, int dir, int ops) +{ + if (ops == NON_COHERENT_DMA_PREP) + return; + + if (ops == NON_COHERENT_SYNC_DMA_FOR_DEVICE) { + switch (dir) { + case DMA_FROM_DEVICE: + ax45mp_cpu_dma_inval_range(vaddr, size); + break; + case DMA_TO_DEVICE: + case DMA_BIDIRECTIONAL: + ax45mp_cpu_dma_wb_range(vaddr, size); + break; + default: + break; + } + return; + } + + /* op == NON_COHERENT_SYNC_DMA_FOR_CPU */ + if (dir == DMA_BIDIRECTIONAL || dir == DMA_FROM_DEVICE) + ax45mp_cpu_dma_inval_range(vaddr, size); +} +EXPORT_SYMBOL(ax45mp_no_iocp_cmo); + +static int ax45mp_configure_l2_cache(struct device_node *np) +{ + int ret; + + ret = of_property_read_u32(np, "cache-line-size", &ax45mp_priv->ax45mp_cache_line_size); + if (ret) { + pr_err("Failed to get cache-line-size defaulting to 64 bytes\n"); + ax45mp_priv->ax45mp_cache_line_size = SZ_64; + } + + if (ax45mp_priv->ax45mp_cache_line_size != SZ_64) { + pr_err("Expected cache-line-size to 64 bytes (found:%u). Defaulting to 64 bytes\n", + ax45mp_priv->ax45mp_cache_line_size); + ax45mp_priv->ax45mp_cache_line_size = SZ_64; + } + + ax45mp_priv->ucctl_ok = ax45mp_cpu_cache_controlable(); + ax45mp_priv->l2cache_enabled = ax45mp_cpu_l2c_ctl_status() & AX45MP_L2_CACHE_CTL_CEN_MASK; + + return 0; +} + +static int ax45mp_l2c_probe(struct platform_device *pdev) +{ + struct device_node *np = pdev->dev.of_node; + int ret; + + ax45mp_priv = devm_kzalloc(&pdev->dev, sizeof(*ax45mp_priv), GFP_KERNEL); + if (!ax45mp_priv) + return -ENOMEM; + + ax45mp_priv->l2c_base = devm_of_iomap(&pdev->dev, pdev->dev.of_node, 0, NULL); + if (!ax45mp_priv->l2c_base) { + ret = -ENOMEM; + goto l2c_err; + } + + ret = ax45mp_configure_l2_cache(np); + if (ret) + goto l2c_err; + + ret = ax45mp_configure_pma_regions(np); + if (ret) + goto l2c_err; + + static_branch_disable(&ax45mp_l2c_configured); + + return 0; + +l2c_err: + devm_kfree(&pdev->dev, ax45mp_priv); + ax45mp_priv = NULL; + return ret; +} + +static const struct of_device_id ax45mp_cache_ids[] = { + { .compatible = "andestech,ax45mp-cache" }, + { /* sentinel */ } +}; + +static struct platform_driver ax45mp_l2c_driver = { + .driver = { + .name = "ax45mp-l2c", + .of_match_table = ax45mp_cache_ids, + }, + .probe = ax45mp_l2c_probe, +}; + +static int __init ax45mp_cache_init(void) +{ + static_branch_enable(&ax45mp_l2c_configured); + return platform_driver_register(&ax45mp_l2c_driver); +} +arch_initcall(ax45mp_cache_init); + +MODULE_AUTHOR("Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>"); +MODULE_DESCRIPTION("Andes AX45MP L2 cache driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/soc/renesas/rzfive/ax45mp_sbi.h b/drivers/soc/renesas/rzfive/ax45mp_sbi.h new file mode 100644 index 000000000000..1604874954d0 --- /dev/null +++ b/drivers/soc/renesas/rzfive/ax45mp_sbi.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ + +#ifndef __AX45MP_SBI_H +#define __AX45MP_SBI_H + +#define SBI_EXT_ANDES 0x0900031E + +enum ax45mp_sbi_ext_fid { + AX45MP_SBI_EXT_GET_MCACHE_CTL_STATUS = 0, + AX45MP_SBI_EXT_GET_MMISC_CTL_STATUS, + AX45MP_SBI_EXT_SET_MCACHE_CTL, + AX45MP_SBI_EXT_SET_MMISC_CTL, + AX45MP_SBI_EXT_ICACHE_OP, + AX45MP_SBI_EXT_DCACHE_OP, + AX45MP_SBI_EXT_L1CACHE_I_PREFETCH, + AX45MP_SBI_EXT_L1CACHE_D_PREFETCH, + AX45MP_SBI_EXT_NON_BLOCKING_LOAD_STORE, + AX45MP_SBI_EXT_WRITE_AROUND, + AX45MP_SBI_EXT_SET_PMA, + AX45MP_SBI_EXT_FREE_PMA, + AX45MP_SBI_EXT_PROBE_PMA, + AX45MP_SBI_EXT_DCACHE_WBINVAL_ALL, + AX45MP_SBI_EXT_GET_MICM_CTL_STATUS, + AX45MP_SBI_EXT_GET_MDCM_CTL_STATUS, + AX45MP_SBI_EXT_GET_MMSC_CTL_STATUS, + AX45MP_SBI_EXT_GET_MISA_CTL_STATUS, +}; + +#endif