diff mbox series

[2/2] riscv: qspinlock: errata: Add ERRATA_THEAD_WRITE_ONCE fixup

Message ID 20241214143039.4139398-3-guoren@kernel.org (mailing list archive)
State New
Headers show
Series riscv: qspinlock: errata: Add ERRATA_THEAD_WRITE_ONCE fixup | expand

Checks

Context Check Description
conchuod/vmtest-for-next-PR success PR summary
conchuod/patch-2-test-1 success .github/scripts/patches/tests/build_rv32_defconfig.sh took 104.77s
conchuod/patch-2-test-2 success .github/scripts/patches/tests/build_rv64_clang_allmodconfig.sh took 1999.99s
conchuod/patch-2-test-3 success .github/scripts/patches/tests/build_rv64_gcc_allmodconfig.sh took 2348.70s
conchuod/patch-2-test-4 success .github/scripts/patches/tests/build_rv64_nommu_k210_defconfig.sh took 15.87s
conchuod/patch-2-test-5 success .github/scripts/patches/tests/build_rv64_nommu_virt_defconfig.sh took 17.62s
conchuod/patch-2-test-6 warning .github/scripts/patches/tests/checkpatch.sh took 1.21s
conchuod/patch-2-test-7 success .github/scripts/patches/tests/dtb_warn_rv64.sh took 35.60s
conchuod/patch-2-test-8 success .github/scripts/patches/tests/header_inline.sh took 0.00s
conchuod/patch-2-test-9 success .github/scripts/patches/tests/kdoc.sh took 0.53s
conchuod/patch-2-test-10 success .github/scripts/patches/tests/module_param.sh took 0.01s
conchuod/patch-2-test-11 success .github/scripts/patches/tests/verify_fixes.sh took 0.00s
conchuod/patch-2-test-12 success .github/scripts/patches/tests/verify_signedoff.sh took 0.02s

Commit Message

Guo Ren Dec. 14, 2024, 2:30 p.m. UTC
From: Guo Ren <guoren@linux.alibaba.com>

The early version of T-Head C9xx cores has a store merge buffer
delay problem. The store merge buffer could improve the store queue
performance by merging multi-store requests, but when there are not
continued store requests, the prior single store request would be
waiting in the store queue for a long time. That would cause
significant problems for communication between multi-cores. This
problem was found on sg2042 & th1520 platforms with the qspinlock
lock torture test.

So appending a fence w.o could immediately flush the store merge
buffer and let other cores see the write result.

This will apply the WRITE_ONCE errata to handle the non-standard
behavior via appending a fence w.o instruction for WRITE_ONCE().

This problem is only observed on the sg2042 hardware platform by
running the lock_torture test program for half an hour. The problem
was not found in the user space application, because interrupt can
break the livelock.

Reviewed-by: Leonardo Bras <leobras@redhat.com>
Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
Signed-off-by: Guo Ren <guoren@kernel.org>
---
 arch/riscv/Kconfig.errata              | 19 +++++++++++++++
 arch/riscv/errata/thead/errata.c       | 20 ++++++++++++++++
 arch/riscv/include/asm/rwonce.h        | 33 ++++++++++++++++++++++++++
 arch/riscv/include/asm/vendorid_list.h |  3 ++-
 include/asm-generic/rwonce.h           |  2 ++
 5 files changed, 76 insertions(+), 1 deletion(-)
 create mode 100644 arch/riscv/include/asm/rwonce.h
diff mbox series

Patch

diff --git a/arch/riscv/Kconfig.errata b/arch/riscv/Kconfig.errata
index 2acc7d876e1f..fd6ba85b7a72 100644
--- a/arch/riscv/Kconfig.errata
+++ b/arch/riscv/Kconfig.errata
@@ -119,4 +119,23 @@  config ERRATA_THEAD_PMU
 
 	  If you don't know what to do here, say "Y".
 
+config ERRATA_THEAD_WRITE_ONCE
+	bool "Apply T-Head WRITE_ONCE errata"
+	depends on ERRATA_THEAD
+	default y
+	help
+	  The early version of T-Head C9xx cores of sg2042 & th1520 have a store
+	  merge buffer delay problem. The store merge buffer could improve the
+	  store queue performance by merging multi-store requests, but when there
+	  are no continued store requests, the prior single store request would be
+	  waiting in the store queue for a long time. That would cause signifi-
+	  cant problems for communication between multi-cores. Appending a
+	  fence w.o could immediately flush the store merge buffer and let other
+	  cores see the write result.
+
+	  This will apply the WRITE_ONCE errata to handle the non-standard beh-
+	  avior via appending a fence w.o instruction for WRITE_ONCE().
+
+	  If you don't know what to do here, say "Y".
+
 endmenu # "CPU errata selection"
diff --git a/arch/riscv/errata/thead/errata.c b/arch/riscv/errata/thead/errata.c
index e24770a77932..dd41f0221fe9 100644
--- a/arch/riscv/errata/thead/errata.c
+++ b/arch/riscv/errata/thead/errata.c
@@ -142,6 +142,23 @@  static bool errata_probe_pmu(unsigned int stage,
 	return true;
 }
 
+static bool errata_probe_write_once(unsigned int stage,
+				    unsigned long arch_id, unsigned long impid)
+{
+	if (!IS_ENABLED(CONFIG_ERRATA_THEAD_WRITE_ONCE))
+		return false;
+
+	/* target-c9xx cores report arch_id and impid as 0 */
+	if (arch_id != 0 || impid != 0)
+		return false;
+
+	if (stage == RISCV_ALTERNATIVES_BOOT ||
+	    stage == RISCV_ALTERNATIVES_MODULE)
+		return true;
+
+	return false;
+}
+
 static u32 thead_errata_probe(unsigned int stage,
 			      unsigned long archid, unsigned long impid)
 {
@@ -155,6 +172,9 @@  static u32 thead_errata_probe(unsigned int stage,
 	if (errata_probe_pmu(stage, archid, impid))
 		cpu_req_errata |= BIT(ERRATA_THEAD_PMU);
 
+	if (errata_probe_write_once(stage, archid, impid))
+		cpu_req_errata |= BIT(ERRATA_THEAD_WRITE_ONCE);
+
 	return cpu_req_errata;
 }
 
diff --git a/arch/riscv/include/asm/rwonce.h b/arch/riscv/include/asm/rwonce.h
new file mode 100644
index 000000000000..756586746a4f
--- /dev/null
+++ b/arch/riscv/include/asm/rwonce.h
@@ -0,0 +1,33 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __ASM_RWONCE_H
+#define __ASM_RWONCE_H
+
+#include <linux/compiler_types.h>
+#include <asm/alternative-macros.h>
+#include <asm/vendorid_list.h>
+
+#if defined(CONFIG_ERRATA_THEAD_WRITE_ONCE) && !defined(NO_ALTERNATIVE)
+
+#define write_once_fence()				\
+do {							\
+	asm volatile(ALTERNATIVE(			\
+		"nop",					\
+		"fence w, o",				\
+		THEAD_VENDOR_ID,			\
+		ERRATA_THEAD_WRITE_ONCE,		\
+		CONFIG_ERRATA_THEAD_WRITE_ONCE)		\
+		: : : "memory");			\
+} while (0)
+
+#define __WRITE_ONCE(x, val)				\
+do {							\
+	*(volatile typeof(x) *)&(x) = (val);		\
+	write_once_fence();				\
+} while (0)
+
+#endif /* defined(CONFIG_ERRATA_THEAD_WRITE_ONCE) && !defined(NO_ALTERNATIVE) */
+
+#include <asm-generic/rwonce.h>
+
+#endif	/* __ASM_RWONCE_H */
diff --git a/arch/riscv/include/asm/vendorid_list.h b/arch/riscv/include/asm/vendorid_list.h
index 7a387368633a..d5a6e3963feb 100644
--- a/arch/riscv/include/asm/vendorid_list.h
+++ b/arch/riscv/include/asm/vendorid_list.h
@@ -23,7 +23,8 @@ 
 #ifdef CONFIG_ERRATA_THEAD
 #define	ERRATA_THEAD_MAE 0
 #define	ERRATA_THEAD_PMU 1
-#define	ERRATA_THEAD_NUMBER 2
+#define	ERRATA_THEAD_WRITE_ONCE 2
+#define	ERRATA_THEAD_NUMBER 3
 #endif
 
 #endif
diff --git a/include/asm-generic/rwonce.h b/include/asm-generic/rwonce.h
index 8d0a6280e982..fb07fe8c6e45 100644
--- a/include/asm-generic/rwonce.h
+++ b/include/asm-generic/rwonce.h
@@ -50,10 +50,12 @@ 
 	__READ_ONCE(x);							\
 })
 
+#ifndef __WRITE_ONCE
 #define __WRITE_ONCE(x, val)						\
 do {									\
 	*(volatile typeof(x) *)&(x) = (val);				\
 } while (0)
+#endif
 
 #define WRITE_ONCE(x, val)						\
 do {									\