diff mbox series

[v7,4/4] riscv: implement cache-management errata for T-Head SoCs

Message ID 20220706231536.2041855-5-heiko@sntech.de (mailing list archive)
State New, archived
Headers show
Series riscv: implement Zicbom-based CMO instructions + the t-head variant | expand

Commit Message

Heiko Stuebner July 6, 2022, 11:15 p.m. UTC
The T-Head C906 and C910 implement a scheme for handling
cache operations different from the generic Zicbom extension.

Add an errata for it next to the generic dma coherency ops.

Reviewed-by: Samuel Holland <samuel@sholland.org>
Tested-by: Samuel Holland <samuel@sholland.org>
Reviewed-by: Guo Ren <guoren@kernel.org>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
---
 arch/riscv/Kconfig.erratas           | 11 +++++++
 arch/riscv/errata/thead/errata.c     | 20 ++++++++++++
 arch/riscv/include/asm/errata_list.h | 48 +++++++++++++++++++++++++---
 3 files changed, 74 insertions(+), 5 deletions(-)

Comments

Palmer Dabbelt Aug. 4, 2022, 12:28 a.m. UTC | #1
On Wed, 06 Jul 2022 16:15:36 PDT (-0700), heiko@sntech.de wrote:
> The T-Head C906 and C910 implement a scheme for handling
> cache operations different from the generic Zicbom extension.
>
> Add an errata for it next to the generic dma coherency ops.
>
> Reviewed-by: Samuel Holland <samuel@sholland.org>
> Tested-by: Samuel Holland <samuel@sholland.org>
> Reviewed-by: Guo Ren <guoren@kernel.org>
> Signed-off-by: Heiko Stuebner <heiko@sntech.de>
> ---
>  arch/riscv/Kconfig.erratas           | 11 +++++++
>  arch/riscv/errata/thead/errata.c     | 20 ++++++++++++
>  arch/riscv/include/asm/errata_list.h | 48 +++++++++++++++++++++++++---
>  3 files changed, 74 insertions(+), 5 deletions(-)
>
> diff --git a/arch/riscv/Kconfig.erratas b/arch/riscv/Kconfig.erratas
> index 457ac72c9b36..3223e533fd87 100644
> --- a/arch/riscv/Kconfig.erratas
> +++ b/arch/riscv/Kconfig.erratas
> @@ -55,4 +55,15 @@ config ERRATA_THEAD_PBMT
>
>  	  If you don't know what to do here, say "Y".
>
> +config ERRATA_THEAD_CMO
> +	bool "Apply T-Head cache management errata"
> +	depends on ERRATA_THEAD
> +	select RISCV_DMA_NONCOHERENT
> +	default y
> +	help
> +	  This will apply the cache management errata to handle the
> +	  non-standard handling on non-coherent operations on T-Head SoCs.
> +
> +	  If you don't know what to do here, say "Y".
> +
>  endmenu
> diff --git a/arch/riscv/errata/thead/errata.c b/arch/riscv/errata/thead/errata.c
> index b37b6fedd53b..202c83f677b2 100644
> --- a/arch/riscv/errata/thead/errata.c
> +++ b/arch/riscv/errata/thead/errata.c
> @@ -27,6 +27,23 @@ static bool errata_probe_pbmt(unsigned int stage,
>  	return false;
>  }
>
> +static bool errata_probe_cmo(unsigned int stage,
> +			     unsigned long arch_id, unsigned long impid)
> +{
> +#ifdef CONFIG_ERRATA_THEAD_CMO
> +	if (arch_id != 0 || impid != 0)
> +		return false;
> +
> +	if (stage == RISCV_ALTERNATIVES_EARLY_BOOT)
> +		return false;
> +
> +	riscv_noncoherent_supported();
> +	return true;
> +#else
> +	return false;
> +#endif
> +}
> +
>  static u32 thead_errata_probe(unsigned int stage,
>  			      unsigned long archid, unsigned long impid)
>  {
> @@ -35,6 +52,9 @@ static u32 thead_errata_probe(unsigned int stage,
>  	if (errata_probe_pbmt(stage, archid, impid))
>  		cpu_req_errata |= (1U << ERRATA_THEAD_PBMT);
>
> +	if (errata_probe_cmo(stage, archid, impid))
> +		cpu_req_errata |= (1U << ERRATA_THEAD_CMO);
> +
>  	return cpu_req_errata;
>  }
>
> diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h
> index 79d89aeeaa6c..19a771085781 100644
> --- a/arch/riscv/include/asm/errata_list.h
> +++ b/arch/riscv/include/asm/errata_list.h
> @@ -16,7 +16,8 @@
>
>  #ifdef CONFIG_ERRATA_THEAD
>  #define	ERRATA_THEAD_PBMT 0
> -#define	ERRATA_THEAD_NUMBER 1
> +#define	ERRATA_THEAD_CMO 1
> +#define	ERRATA_THEAD_NUMBER 2
>  #endif
>
>  #define	CPUFEATURE_SVPBMT 0
> @@ -88,17 +89,54 @@ asm volatile(ALTERNATIVE(						\
>  #define ALT_THEAD_PMA(_val)
>  #endif
>
> +/*
> + * dcache.ipa rs1 (invalidate, physical address)
> + * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 |
> + *   0000001    01010      rs1       000      00000  0001011
> + * dache.iva rs1 (invalida, virtual address)
> + *   0000001    00110      rs1       000      00000  0001011
> + *
> + * dcache.cpa rs1 (clean, physical address)
> + * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 |
> + *   0000001    01001      rs1       000      00000  0001011
> + * dcache.cva rs1 (clean, virtual address)
> + *   0000001    00100      rs1       000      00000  0001011
> + *
> + * dcache.cipa rs1 (clean then invalidate, physical address)
> + * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 |
> + *   0000001    01011      rs1       000      00000  0001011
> + * dcache.civa rs1 (... virtual address)
> + *   0000001    00111      rs1       000      00000  0001011
> + *
> + * sync.s (make sure all cache operations finished)
> + * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 |
> + *   0000000    11001     00000      000      00000  0001011
> + */
> +#define THEAD_inval_A0	".long 0x0265000b"
> +#define THEAD_clean_A0	".long 0x0245000b"
> +#define THEAD_flush_A0	".long 0x0275000b"
> +#define THEAD_SYNC_S	".long 0x0190000b"

I'm not sure what to do with these: I really don't want to have a bunch 
of raw binary instruction encodings floating around, but it looks like 
the T-Head folks want to re-write their ISA manual before merging the 
GAS support for it which means we'd be stuck going another release cycle 
(and presumably another year of LTS) before getting the hardware 
supported.  It really seems like we're just going in circles here trying 
to get everything lined up, and it's getting silly blocking real 
hardware from working because of a little bit of ugliness.

I know I said I really don't want the executable .long stuff for this, 
and IIRC that's a pretty common sentiment.  I'm not sure if I'm just fed 
up with all the craziness, but I'm kind of inclined to just merge this 
as-is -- at least that way we can get the hardware working.  In the long 
run I think we're going to end up with some much uglier errata, so I 
doubt we'll be all that worried about this one later.

That said, I'll give folks some time to chime in as IIRC this has been 
pointed out a handful of times.

> +
>  #define ALT_CMO_OP(_op, _start, _size, _cachesize)			\
> -asm volatile(ALTERNATIVE(						\
> -	__nops(5),							\
> +asm volatile(ALTERNATIVE_2(						\
> +	__nops(6),							\
>  	"mv a0, %1\n\t"							\
>  	"j 2f\n\t"							\
>  	"3:\n\t"							\
>  	"cbo." __stringify(_op) " (a0)\n\t"				\
>  	"add a0, a0, %0\n\t"						\
>  	"2:\n\t"							\
> -	"bltu a0, %2, 3b\n\t", 0,					\
> -		CPUFEATURE_ZICBOM, CONFIG_RISCV_ISA_ZICBOM)		\
> +	"bltu a0, %2, 3b\n\t"						\
> +	"nop", 0, CPUFEATURE_ZICBOM, CONFIG_RISCV_ISA_ZICBOM,		\
> +	"mv a0, %1\n\t"							\
> +	"j 2f\n\t"							\
> +	"3:\n\t"							\
> +	THEAD_##_op##_A0 "\n\t"						\
> +	"add a0, a0, %0\n\t"						\
> +	"2:\n\t"							\
> +	"bltu a0, %2, 3b\n\t"						\
> +	THEAD_SYNC_S, THEAD_VENDOR_ID,					\
> +			ERRATA_THEAD_CMO, CONFIG_ERRATA_THEAD_CMO)	\
>  	: : "r"(_cachesize),						\
>  	    "r"((unsigned long)(_start) & ~((_cachesize) - 1UL)),	\
>  	    "r"((unsigned long)(_start) + (_size))			\
Arnd Bergmann Aug. 4, 2022, 8:16 a.m. UTC | #2
On Thu, Aug 4, 2022 at 2:28 AM Palmer Dabbelt <palmer@dabbelt.com> wrote:

> I know I said I really don't want the executable .long stuff for this,
> and IIRC that's a pretty common sentiment.  I'm not sure if I'm just fed
> up with all the craziness, but I'm kind of inclined to just merge this
> as-is -- at least that way we can get the hardware working.

There is usually not much choice here if you want to allow building
with older toolchains. You might want to add a comment for each one
of those to reference the (projected) binutils version that adds support
so it can be cleaned up after you raise the minimum toolchain
requirements, but that takes years.

        Arnd
diff mbox series

Patch

diff --git a/arch/riscv/Kconfig.erratas b/arch/riscv/Kconfig.erratas
index 457ac72c9b36..3223e533fd87 100644
--- a/arch/riscv/Kconfig.erratas
+++ b/arch/riscv/Kconfig.erratas
@@ -55,4 +55,15 @@  config ERRATA_THEAD_PBMT
 
 	  If you don't know what to do here, say "Y".
 
+config ERRATA_THEAD_CMO
+	bool "Apply T-Head cache management errata"
+	depends on ERRATA_THEAD
+	select RISCV_DMA_NONCOHERENT
+	default y
+	help
+	  This will apply the cache management errata to handle the
+	  non-standard handling on non-coherent operations on T-Head SoCs.
+
+	  If you don't know what to do here, say "Y".
+
 endmenu
diff --git a/arch/riscv/errata/thead/errata.c b/arch/riscv/errata/thead/errata.c
index b37b6fedd53b..202c83f677b2 100644
--- a/arch/riscv/errata/thead/errata.c
+++ b/arch/riscv/errata/thead/errata.c
@@ -27,6 +27,23 @@  static bool errata_probe_pbmt(unsigned int stage,
 	return false;
 }
 
+static bool errata_probe_cmo(unsigned int stage,
+			     unsigned long arch_id, unsigned long impid)
+{
+#ifdef CONFIG_ERRATA_THEAD_CMO
+	if (arch_id != 0 || impid != 0)
+		return false;
+
+	if (stage == RISCV_ALTERNATIVES_EARLY_BOOT)
+		return false;
+
+	riscv_noncoherent_supported();
+	return true;
+#else
+	return false;
+#endif
+}
+
 static u32 thead_errata_probe(unsigned int stage,
 			      unsigned long archid, unsigned long impid)
 {
@@ -35,6 +52,9 @@  static u32 thead_errata_probe(unsigned int stage,
 	if (errata_probe_pbmt(stage, archid, impid))
 		cpu_req_errata |= (1U << ERRATA_THEAD_PBMT);
 
+	if (errata_probe_cmo(stage, archid, impid))
+		cpu_req_errata |= (1U << ERRATA_THEAD_CMO);
+
 	return cpu_req_errata;
 }
 
diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h
index 79d89aeeaa6c..19a771085781 100644
--- a/arch/riscv/include/asm/errata_list.h
+++ b/arch/riscv/include/asm/errata_list.h
@@ -16,7 +16,8 @@ 
 
 #ifdef CONFIG_ERRATA_THEAD
 #define	ERRATA_THEAD_PBMT 0
-#define	ERRATA_THEAD_NUMBER 1
+#define	ERRATA_THEAD_CMO 1
+#define	ERRATA_THEAD_NUMBER 2
 #endif
 
 #define	CPUFEATURE_SVPBMT 0
@@ -88,17 +89,54 @@  asm volatile(ALTERNATIVE(						\
 #define ALT_THEAD_PMA(_val)
 #endif
 
+/*
+ * dcache.ipa rs1 (invalidate, physical address)
+ * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 |
+ *   0000001    01010      rs1       000      00000  0001011
+ * dache.iva rs1 (invalida, virtual address)
+ *   0000001    00110      rs1       000      00000  0001011
+ *
+ * dcache.cpa rs1 (clean, physical address)
+ * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 |
+ *   0000001    01001      rs1       000      00000  0001011
+ * dcache.cva rs1 (clean, virtual address)
+ *   0000001    00100      rs1       000      00000  0001011
+ *
+ * dcache.cipa rs1 (clean then invalidate, physical address)
+ * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 |
+ *   0000001    01011      rs1       000      00000  0001011
+ * dcache.civa rs1 (... virtual address)
+ *   0000001    00111      rs1       000      00000  0001011
+ *
+ * sync.s (make sure all cache operations finished)
+ * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 |
+ *   0000000    11001     00000      000      00000  0001011
+ */
+#define THEAD_inval_A0	".long 0x0265000b"
+#define THEAD_clean_A0	".long 0x0245000b"
+#define THEAD_flush_A0	".long 0x0275000b"
+#define THEAD_SYNC_S	".long 0x0190000b"
+
 #define ALT_CMO_OP(_op, _start, _size, _cachesize)			\
-asm volatile(ALTERNATIVE(						\
-	__nops(5),							\
+asm volatile(ALTERNATIVE_2(						\
+	__nops(6),							\
 	"mv a0, %1\n\t"							\
 	"j 2f\n\t"							\
 	"3:\n\t"							\
 	"cbo." __stringify(_op) " (a0)\n\t"				\
 	"add a0, a0, %0\n\t"						\
 	"2:\n\t"							\
-	"bltu a0, %2, 3b\n\t", 0,					\
-		CPUFEATURE_ZICBOM, CONFIG_RISCV_ISA_ZICBOM)		\
+	"bltu a0, %2, 3b\n\t"						\
+	"nop", 0, CPUFEATURE_ZICBOM, CONFIG_RISCV_ISA_ZICBOM,		\
+	"mv a0, %1\n\t"							\
+	"j 2f\n\t"							\
+	"3:\n\t"							\
+	THEAD_##_op##_A0 "\n\t"						\
+	"add a0, a0, %0\n\t"						\
+	"2:\n\t"							\
+	"bltu a0, %2, 3b\n\t"						\
+	THEAD_SYNC_S, THEAD_VENDOR_ID,					\
+			ERRATA_THEAD_CMO, CONFIG_ERRATA_THEAD_CMO)	\
 	: : "r"(_cachesize),						\
 	    "r"((unsigned long)(_start) & ~((_cachesize) - 1UL)),	\
 	    "r"((unsigned long)(_start) + (_size))			\