diff mbox series

[2/2] MIPS: Loongson, workaround ll/sc weak ordering

Message ID 20190105150037.30261-2-syq@debian.org (mailing list archive)
State Superseded
Headers show
Series [1/2] MIPS: Loongson, add sync before target of branch between llsc | expand

Commit Message

YunQiang Su Jan. 5, 2019, 3 p.m. UTC
From: YunQiang Su <ysu@wavecomp.com>

On the Loongson-2G/2H/3A/3B there is a hardware flaw that ll/sc and
lld/scd is very weak ordering. We should add sync instructions before
each ll/lld and after the last sc/scd to workaround. Otherwise, this
flaw will cause deadlock occationally (e.g. when doing heavy load test
with LTP).

We introduced an gcc/as option "-mfix-loongson3-llsc", this option
inserts sync before ll, and so some addresses in __ex_table will need
to be shift.

Not all Loongson CPU have this problem, aka Loongson starts to solve it
in their new models, such as the last series Loongson 3A 3000.
So for kerenel we introduce a config option CPU_LOONGSON3_WORKAROUND_LLSC,
with this option enabled, we will add "-mfix-loongson3-llsc" to
cc-option.

This is based on the patch from Huacai Chen.

Signed-off-by: YunQiang Su <ysu@wavecomp.com>
---
 arch/mips/Kconfig             | 19 +++++++++++++++++++
 arch/mips/Makefile            |  5 +++++
 arch/mips/include/asm/futex.h | 20 ++++++++++++--------
 arch/mips/mm/tlbex.c          |  3 +++
 4 files changed, 39 insertions(+), 8 deletions(-)
diff mbox series

Patch

diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 787290781..4660e7847 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -1385,6 +1385,25 @@  config CPU_LOONGSON3
 		The Loongson 3 processor implements the MIPS64R2 instruction
 		set with many extensions.
 
+config CPU_LOONGSON3_WORKAROUND_LLSC
+	bool "Workaround the LL/SC weak ordering"
+	default n
+	depends on CPU_LOONGSON3
+	help
+	  On the Loongson-2G/2H/3A/3B there is a hardware flaw that ll/sc and
+	  lld/scd is very weak ordering. We should add sync instructions before
+	  each ll/lld and after the last sc/scd to workaround. Otherwise, this
+	  flaw will cause deadlock occationally (e.g. when doing heavy load test
+	  with LTP).
+
+	  We introduced a gcc/as option "-mfix-loongson3-llsc", this option
+	  inserts sync before ll, and so some addresses in __ex_table will need
+	  to be shift.
+
+	  Newer model has solve this problem, such as the last series of 3A 3000
+	  but not all 3A 3000. If you want enable this workaround for older
+	  Loongson's CPU, please say 'Y' here.
+
 config LOONGSON3_ENHANCEMENT
 	bool "New Loongson 3 CPU Enhancements"
 	default n
diff --git a/arch/mips/Makefile b/arch/mips/Makefile
index 5b174c3d0..c2afaf58b 100644
--- a/arch/mips/Makefile
+++ b/arch/mips/Makefile
@@ -194,6 +194,11 @@  cflags-$(CONFIG_CPU_CAVIUM_OCTEON) += -Wa,-march=octeon
 endif
 cflags-$(CONFIG_CAVIUM_CN63XXP1) += -Wa,-mfix-cn63xxp1
 cflags-$(CONFIG_CPU_BMIPS)	+= -march=mips32 -Wa,-mips32 -Wa,--trap
+ifeq ($(CONFIG_CPU_LOONGSON3_WORKAROUND_LLSC),y)
+cflags-y	+= -mfix-loongson3-llsc
+else
+cflags-y	+= $(call cc-option,-mno-fix-loongson3-llsc,)
+endif
 
 cflags-$(CONFIG_CPU_R4000_WORKAROUNDS)	+= $(call cc-option,-mfix-r4000,)
 cflags-$(CONFIG_CPU_R4400_WORKAROUNDS)	+= $(call cc-option,-mfix-r4400,)
diff --git a/arch/mips/include/asm/futex.h b/arch/mips/include/asm/futex.h
index 8eff134b3..c0608697f 100644
--- a/arch/mips/include/asm/futex.h
+++ b/arch/mips/include/asm/futex.h
@@ -18,6 +18,14 @@ 
 #include <asm/errno.h>
 #include <asm/war.h>
 
+#if defined(__mips_fix_loongson3_llsc) && defined(CONFIG_CPU_LOONGSON3_WORKAROUND_LLSC)
+# define LL_SHIFT_UA __UA_ADDR "\t(1b+0), 4b		\n" 	\
+		__UA_ADDR "\t(1b+4), 4b			\n"	\
+		__UA_ADDR "\t(2b+0), 4b			\n"
+#else
+# define LL_SHIFT_UA __UA_ADDR "\t1b, 4b		\n" 	\
+		__UA_ADDR "\t2b, 4b			\n"
+#endif
 #define __futex_atomic_op(insn, ret, oldval, uaddr, oparg)		\
 {									\
 	if (cpu_has_llsc && R10000_LLSC_WAR) {				\
@@ -41,8 +49,7 @@ 
 		"	j	3b				\n"	\
 		"	.previous				\n"	\
 		"	.section __ex_table,\"a\"		\n"	\
-		"	"__UA_ADDR "\t1b, 4b			\n"	\
-		"	"__UA_ADDR "\t2b, 4b			\n"	\
+		LL_SHIFT_UA						\
 		"	.previous				\n"	\
 		: "=r" (ret), "=&r" (oldval),				\
 		  "=" GCC_OFF_SMALL_ASM() (*uaddr)				\
@@ -70,8 +77,7 @@ 
 		"	j	3b				\n"	\
 		"	.previous				\n"	\
 		"	.section __ex_table,\"a\"		\n"	\
-		"	"__UA_ADDR "\t1b, 4b			\n"	\
-		"	"__UA_ADDR "\t2b, 4b			\n"	\
+		LL_SHIFT_UA						\
 		"	.previous				\n"	\
 		: "=r" (ret), "=&r" (oldval),				\
 		  "=" GCC_OFF_SMALL_ASM() (*uaddr)				\
@@ -155,8 +161,7 @@  futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
 		"	j	3b					\n"
 		"	.previous					\n"
 		"	.section __ex_table,\"a\"			\n"
-		"	"__UA_ADDR "\t1b, 4b				\n"
-		"	"__UA_ADDR "\t2b, 4b				\n"
+		LL_SHIFT_UA
 		"	.previous					\n"
 		: "+r" (ret), "=&r" (val), "=" GCC_OFF_SMALL_ASM() (*uaddr)
 		: GCC_OFF_SMALL_ASM() (*uaddr), "Jr" (oldval), "Jr" (newval),
@@ -185,8 +190,7 @@  futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
 		"	j	3b					\n"
 		"	.previous					\n"
 		"	.section __ex_table,\"a\"			\n"
-		"	"__UA_ADDR "\t1b, 4b				\n"
-		"	"__UA_ADDR "\t2b, 4b				\n"
+		LL_SHIFT_UA
 		"	.previous					\n"
 		: "+r" (ret), "=&r" (val), "=" GCC_OFF_SMALL_ASM() (*uaddr)
 		: GCC_OFF_SMALL_ASM() (*uaddr), "Jr" (oldval), "Jr" (newval),
diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c
index 08a9a66ef..e9eb4715c 100644
--- a/arch/mips/mm/tlbex.c
+++ b/arch/mips/mm/tlbex.c
@@ -1649,6 +1649,9 @@  static void
 iPTE_LW(u32 **p, unsigned int pte, unsigned int ptr)
 {
 #ifdef CONFIG_SMP
+# ifdef CONFIG_CPU_LOONGSON3_WORKAROUND_LLSC
+	uasm_i_sync(p, 0);
+# endif
 # ifdef CONFIG_PHYS_ADDR_T_64BIT
 	if (cpu_has_64bits)
 		uasm_i_lld(p, pte, 0, ptr);