Message ID | 20230910082911.3378782-10-guoren@kernel.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | riscv: Add Native/Paravirt qspinlock support | expand |
On Sun, Sep 10, 2023 at 04:29:03AM -0400, guoren@kernel.org wrote: > From: Guo Ren <guoren@linux.alibaba.com> > > The early version of T-Head C9xx cores has a store merge buffer > delay problem. The store merge buffer could improve the store queue > performance by merging multi-store requests, but when there are not > continued store requests, the prior single store request would be > waiting in the store queue for a long time. That would cause > significant problems for communication between multi-cores. This > problem was found on sg2042 & th1520 platforms with the qspinlock > lock torture test. > > So appending a fence w.o could immediately flush the store merge > buffer and let other cores see the write result. > > This will apply the WRITE_ONCE errata to handle the non-standard > behavior via appending a fence w.o instruction for WRITE_ONCE(). > > Signed-off-by: Guo Ren <guoren@linux.alibaba.com> > Signed-off-by: Guo Ren <guoren@kernel.org> > --- > arch/riscv/Kconfig.errata | 19 +++++++++++++++++++ > arch/riscv/errata/thead/errata.c | 20 ++++++++++++++++++++ > arch/riscv/include/asm/errata_list.h | 13 ------------- > arch/riscv/include/asm/rwonce.h | 24 ++++++++++++++++++++++++ > arch/riscv/include/asm/vendorid_list.h | 14 ++++++++++++++ > include/asm-generic/rwonce.h | 2 ++ > 6 files changed, 79 insertions(+), 13 deletions(-) > create mode 100644 arch/riscv/include/asm/rwonce.h > > diff --git a/arch/riscv/Kconfig.errata b/arch/riscv/Kconfig.errata > index 1aa85a427ff3..c919cc3f1a3a 100644 > --- a/arch/riscv/Kconfig.errata > +++ b/arch/riscv/Kconfig.errata > @@ -77,4 +77,23 @@ config ERRATA_THEAD_PMU > > If you don't know what to do here, say "Y". > > +config ERRATA_THEAD_WRITE_ONCE > + bool "Apply T-Head WRITE_ONCE errata" > + depends on ERRATA_THEAD > + default y > + help > + The early version of T-Head C9xx cores has a store merge buffer > + delay problem. The store merge buffer could improve the store queue > + performance by merging multi-store requests, but when there are no > + continued store requests, the prior single store request would be > + waiting in the store queue for a long time. That would cause > + significant problems for communication between multi-cores. Appending > + a fence w.o could immediately flush the store merge buffer and let > + other cores see the write result. > + > + This will apply the WRITE_ONCE errata to handle the non-standard > + behavior via appending a fence w.o instruction for WRITE_ONCE(). > + > + If you don't know what to do here, say "Y". > + > endmenu # "CPU errata selection" > diff --git a/arch/riscv/errata/thead/errata.c b/arch/riscv/errata/thead/errata.c > index be84b14f0118..751eb5a7f614 100644 > --- a/arch/riscv/errata/thead/errata.c > +++ b/arch/riscv/errata/thead/errata.c > @@ -69,6 +69,23 @@ static bool errata_probe_pmu(unsigned int stage, > return true; > } > > +static bool errata_probe_write_once(unsigned int stage, > + unsigned long arch_id, unsigned long impid) > +{ > + if (!IS_ENABLED(CONFIG_ERRATA_THEAD_WRITE_ONCE)) > + return false; > + > + /* target-c9xx cores report arch_id and impid as 0 */ > + if (arch_id != 0 || impid != 0) > + return false; > + > + if (stage == RISCV_ALTERNATIVES_BOOT || > + stage == RISCV_ALTERNATIVES_MODULE) > + return true; > + > + return false; > +} > + > static u32 thead_errata_probe(unsigned int stage, > unsigned long archid, unsigned long impid) > { > @@ -83,6 +100,9 @@ static u32 thead_errata_probe(unsigned int stage, > if (errata_probe_pmu(stage, archid, impid)) > cpu_req_errata |= BIT(ERRATA_THEAD_PMU); > > + if (errata_probe_write_once(stage, archid, impid)) > + cpu_req_errata |= BIT(ERRATA_THEAD_WRITE_ONCE); > + > return cpu_req_errata; > } > > diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h > index 712cab7adffe..fbb2b8d39321 100644 > --- a/arch/riscv/include/asm/errata_list.h > +++ b/arch/riscv/include/asm/errata_list.h > @@ -11,19 +11,6 @@ > #include <asm/hwcap.h> > #include <asm/vendorid_list.h> > > -#ifdef CONFIG_ERRATA_SIFIVE > -#define ERRATA_SIFIVE_CIP_453 0 > -#define ERRATA_SIFIVE_CIP_1200 1 > -#define ERRATA_SIFIVE_NUMBER 2 > -#endif > - > -#ifdef CONFIG_ERRATA_THEAD > -#define ERRATA_THEAD_PBMT 0 > -#define ERRATA_THEAD_CMO 1 > -#define ERRATA_THEAD_PMU 2 > -#define ERRATA_THEAD_NUMBER 3 > -#endif > - Here I understand you are moving stuff from errata_list.h to vendorid_list.h. Wouldn't it be better to do this on a separated patch before this one? I understand this is used here, but it looks like it's unrelated. > #ifdef __ASSEMBLY__ > > #define ALT_INSN_FAULT(x) \ > diff --git a/arch/riscv/include/asm/rwonce.h b/arch/riscv/include/asm/rwonce.h > new file mode 100644 > index 000000000000..be0b8864969d > --- /dev/null > +++ b/arch/riscv/include/asm/rwonce.h > @@ -0,0 +1,24 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ > + > +#ifndef __ASM_RWONCE_H > +#define __ASM_RWONCE_H > + > +#include <linux/compiler_types.h> > +#include <asm/alternative-macros.h> > +#include <asm/vendorid_list.h> > + > +#define __WRITE_ONCE(x, val) \ > +do { \ > + *(volatile typeof(x) *)&(x) = (val); \ > + asm volatile(ALTERNATIVE( \ > + __nops(1), \ > + "fence w, o\n\t", \ > + THEAD_VENDOR_ID, \ > + ERRATA_THEAD_WRITE_ONCE, \ > + CONFIG_ERRATA_THEAD_WRITE_ONCE) \ > + : : : "memory"); \ > +} while (0) > + > +#include <asm-generic/rwonce.h> > + > +#endif /* __ASM_RWONCE_H */ IIUC the idea here is to have an alternative __WRITE_ONCE that replaces the asm-generic one. Honestly, this asm alternative here seems too much information, and too cryptic. I mean, yeah in the patch it all makes sense, but I imagine myself in the future looking at all this and trying to understand what is going on. Wouldn't it look better to have something like: ##### /* Some explanation like the one on Kconfig */ #define write_once_flush() \ do { \ asm volatile(ALTERNATIVE( \ __nops(1), \ "fence w, o\n\t", \ THEAD_VENDOR_ID, \ ERRATA_THEAD_WRITE_ONCE, \ CONFIG_ERRATA_THEAD_WRITE_ONCE) \ : : : "memory"); \ } while(0) #define __WRITE_ONCE(x, val) \ do { \ *(volatile typeof(x) *)&(x) = (val); \ write_once_flush(); \ } while(0) ##### This way I could quickly see there is a flush after the writting of WRITE_ONCE(), and this flush is the above "complicated" asm. What do you think? > diff --git a/arch/riscv/include/asm/vendorid_list.h b/arch/riscv/include/asm/vendorid_list.h > index cb89af3f0704..73078cfe4029 100644 > --- a/arch/riscv/include/asm/vendorid_list.h > +++ b/arch/riscv/include/asm/vendorid_list.h > @@ -8,4 +8,18 @@ > #define SIFIVE_VENDOR_ID 0x489 > #define THEAD_VENDOR_ID 0x5b7 > > +#ifdef CONFIG_ERRATA_SIFIVE > +#define ERRATA_SIFIVE_CIP_453 0 > +#define ERRATA_SIFIVE_CIP_1200 1 > +#define ERRATA_SIFIVE_NUMBER 2 > +#endif > + > +#ifdef CONFIG_ERRATA_THEAD > +#define ERRATA_THEAD_PBMT 0 > +#define ERRATA_THEAD_CMO 1 > +#define ERRATA_THEAD_PMU 2 > +#define ERRATA_THEAD_WRITE_ONCE 3 > +#define ERRATA_THEAD_NUMBER 4 > +#endif > + > #endif > diff --git a/include/asm-generic/rwonce.h b/include/asm-generic/rwonce.h > index 8d0a6280e982..fb07fe8c6e45 100644 > --- a/include/asm-generic/rwonce.h > +++ b/include/asm-generic/rwonce.h > @@ -50,10 +50,12 @@ > __READ_ONCE(x); \ > }) > > +#ifndef __WRITE_ONCE > #define __WRITE_ONCE(x, val) \ > do { \ > *(volatile typeof(x) *)&(x) = (val); \ > } while (0) > +#endif > > #define WRITE_ONCE(x, val) \ > do { \ > -- > 2.36.1 >
On Thu, Sep 14, 2023 at 4:32 PM Leonardo Bras <leobras@redhat.com> wrote: > > On Sun, Sep 10, 2023 at 04:29:03AM -0400, guoren@kernel.org wrote: > > From: Guo Ren <guoren@linux.alibaba.com> > > > > The early version of T-Head C9xx cores has a store merge buffer > > delay problem. The store merge buffer could improve the store queue > > performance by merging multi-store requests, but when there are not > > continued store requests, the prior single store request would be > > waiting in the store queue for a long time. That would cause > > significant problems for communication between multi-cores. This > > problem was found on sg2042 & th1520 platforms with the qspinlock > > lock torture test. > > > > So appending a fence w.o could immediately flush the store merge > > buffer and let other cores see the write result. > > > > This will apply the WRITE_ONCE errata to handle the non-standard > > behavior via appending a fence w.o instruction for WRITE_ONCE(). > > > > Signed-off-by: Guo Ren <guoren@linux.alibaba.com> > > Signed-off-by: Guo Ren <guoren@kernel.org> > > --- > > arch/riscv/Kconfig.errata | 19 +++++++++++++++++++ > > arch/riscv/errata/thead/errata.c | 20 ++++++++++++++++++++ > > arch/riscv/include/asm/errata_list.h | 13 ------------- > > arch/riscv/include/asm/rwonce.h | 24 ++++++++++++++++++++++++ > > arch/riscv/include/asm/vendorid_list.h | 14 ++++++++++++++ > > include/asm-generic/rwonce.h | 2 ++ > > 6 files changed, 79 insertions(+), 13 deletions(-) > > create mode 100644 arch/riscv/include/asm/rwonce.h > > > > diff --git a/arch/riscv/Kconfig.errata b/arch/riscv/Kconfig.errata > > index 1aa85a427ff3..c919cc3f1a3a 100644 > > --- a/arch/riscv/Kconfig.errata > > +++ b/arch/riscv/Kconfig.errata > > @@ -77,4 +77,23 @@ config ERRATA_THEAD_PMU > > > > If you don't know what to do here, say "Y". > > > > +config ERRATA_THEAD_WRITE_ONCE > > + bool "Apply T-Head WRITE_ONCE errata" > > + depends on ERRATA_THEAD > > + default y > > + help > > + The early version of T-Head C9xx cores has a store merge buffer > > + delay problem. The store merge buffer could improve the store queue > > + performance by merging multi-store requests, but when there are no > > + continued store requests, the prior single store request would be > > + waiting in the store queue for a long time. That would cause > > + significant problems for communication between multi-cores. Appending > > + a fence w.o could immediately flush the store merge buffer and let > > + other cores see the write result. > > + > > + This will apply the WRITE_ONCE errata to handle the non-standard > > + behavior via appending a fence w.o instruction for WRITE_ONCE(). > > + > > + If you don't know what to do here, say "Y". > > + > > endmenu # "CPU errata selection" > > diff --git a/arch/riscv/errata/thead/errata.c b/arch/riscv/errata/thead/errata.c > > index be84b14f0118..751eb5a7f614 100644 > > --- a/arch/riscv/errata/thead/errata.c > > +++ b/arch/riscv/errata/thead/errata.c > > @@ -69,6 +69,23 @@ static bool errata_probe_pmu(unsigned int stage, > > return true; > > } > > > > +static bool errata_probe_write_once(unsigned int stage, > > + unsigned long arch_id, unsigned long impid) > > +{ > > + if (!IS_ENABLED(CONFIG_ERRATA_THEAD_WRITE_ONCE)) > > + return false; > > + > > + /* target-c9xx cores report arch_id and impid as 0 */ > > + if (arch_id != 0 || impid != 0) > > + return false; > > + > > + if (stage == RISCV_ALTERNATIVES_BOOT || > > + stage == RISCV_ALTERNATIVES_MODULE) > > + return true; > > + > > + return false; > > +} > > + > > static u32 thead_errata_probe(unsigned int stage, > > unsigned long archid, unsigned long impid) > > { > > @@ -83,6 +100,9 @@ static u32 thead_errata_probe(unsigned int stage, > > if (errata_probe_pmu(stage, archid, impid)) > > cpu_req_errata |= BIT(ERRATA_THEAD_PMU); > > > > + if (errata_probe_write_once(stage, archid, impid)) > > + cpu_req_errata |= BIT(ERRATA_THEAD_WRITE_ONCE); > > + > > return cpu_req_errata; > > } > > > > diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h > > index 712cab7adffe..fbb2b8d39321 100644 > > --- a/arch/riscv/include/asm/errata_list.h > > +++ b/arch/riscv/include/asm/errata_list.h > > @@ -11,19 +11,6 @@ > > #include <asm/hwcap.h> > > #include <asm/vendorid_list.h> > > > > -#ifdef CONFIG_ERRATA_SIFIVE > > -#define ERRATA_SIFIVE_CIP_453 0 > > -#define ERRATA_SIFIVE_CIP_1200 1 > > -#define ERRATA_SIFIVE_NUMBER 2 > > -#endif > > - > > -#ifdef CONFIG_ERRATA_THEAD > > -#define ERRATA_THEAD_PBMT 0 > > -#define ERRATA_THEAD_CMO 1 > > -#define ERRATA_THEAD_PMU 2 > > -#define ERRATA_THEAD_NUMBER 3 > > -#endif > > - > > Here I understand you are moving stuff from errata_list.h to > vendorid_list.h. Wouldn't it be better to do this on a separated patch > before this one? Okay. > > I understand this is used here, but it looks like it's unrelated. > > > #ifdef __ASSEMBLY__ > > > > #define ALT_INSN_FAULT(x) \ > > diff --git a/arch/riscv/include/asm/rwonce.h b/arch/riscv/include/asm/rwonce.h > > new file mode 100644 > > index 000000000000..be0b8864969d > > --- /dev/null > > +++ b/arch/riscv/include/asm/rwonce.h > > @@ -0,0 +1,24 @@ > > +/* SPDX-License-Identifier: GPL-2.0 */ > > + > > +#ifndef __ASM_RWONCE_H > > +#define __ASM_RWONCE_H > > + > > +#include <linux/compiler_types.h> > > +#include <asm/alternative-macros.h> > > +#include <asm/vendorid_list.h> > > + > > +#define __WRITE_ONCE(x, val) \ > > +do { \ > > + *(volatile typeof(x) *)&(x) = (val); \ > > + asm volatile(ALTERNATIVE( \ > > + __nops(1), \ > > + "fence w, o\n\t", \ > > + THEAD_VENDOR_ID, \ > > + ERRATA_THEAD_WRITE_ONCE, \ > > + CONFIG_ERRATA_THEAD_WRITE_ONCE) \ > > + : : : "memory"); \ > > +} while (0) > > + > > +#include <asm-generic/rwonce.h> > > + > > +#endif /* __ASM_RWONCE_H */ > > IIUC the idea here is to have an alternative __WRITE_ONCE that replaces the > asm-generic one. > > Honestly, this asm alternative here seems too much information, and too > cryptic. I mean, yeah in the patch it all makes sense, but I imagine myself > in the future looking at all this and trying to understand what is going > on. > > Wouldn't it look better to have something like: > > ##### > > /* Some explanation like the one on Kconfig */ > > #define write_once_flush() \ > do { \ > asm volatile(ALTERNATIVE( \ > __nops(1), \ > "fence w, o\n\t", \ > THEAD_VENDOR_ID, \ > ERRATA_THEAD_WRITE_ONCE, \ > CONFIG_ERRATA_THEAD_WRITE_ONCE) \ > : : : "memory"); \ > } while(0) > > > #define __WRITE_ONCE(x, val) \ > do { \ > *(volatile typeof(x) *)&(x) = (val); \ > write_once_flush(); \ > } while(0) > > ##### > > > This way I could quickly see there is a flush after the writting of > WRITE_ONCE(), and this flush is the above "complicated" asm. > > What do you think? Okay, good point, and I would take it. > > > diff --git a/arch/riscv/include/asm/vendorid_list.h b/arch/riscv/include/asm/vendorid_list.h > > index cb89af3f0704..73078cfe4029 100644 > > --- a/arch/riscv/include/asm/vendorid_list.h > > +++ b/arch/riscv/include/asm/vendorid_list.h > > @@ -8,4 +8,18 @@ > > #define SIFIVE_VENDOR_ID 0x489 > > #define THEAD_VENDOR_ID 0x5b7 > > > > +#ifdef CONFIG_ERRATA_SIFIVE > > +#define ERRATA_SIFIVE_CIP_453 0 > > +#define ERRATA_SIFIVE_CIP_1200 1 > > +#define ERRATA_SIFIVE_NUMBER 2 > > +#endif > > + > > +#ifdef CONFIG_ERRATA_THEAD > > +#define ERRATA_THEAD_PBMT 0 > > +#define ERRATA_THEAD_CMO 1 > > +#define ERRATA_THEAD_PMU 2 > > +#define ERRATA_THEAD_WRITE_ONCE 3 > > +#define ERRATA_THEAD_NUMBER 4 > > +#endif > > + > > #endif > > diff --git a/include/asm-generic/rwonce.h b/include/asm-generic/rwonce.h > > index 8d0a6280e982..fb07fe8c6e45 100644 > > --- a/include/asm-generic/rwonce.h > > +++ b/include/asm-generic/rwonce.h > > @@ -50,10 +50,12 @@ > > __READ_ONCE(x); \ > > }) > > > > +#ifndef __WRITE_ONCE > > #define __WRITE_ONCE(x, val) \ > > do { \ > > *(volatile typeof(x) *)&(x) = (val); \ > > } while (0) > > +#endif > > > > #define WRITE_ONCE(x, val) \ > > do { \ > > -- > > 2.36.1 > > >
On Sun, Sep 17, 2023 at 11:15:51PM +0800, Guo Ren wrote: > On Thu, Sep 14, 2023 at 4:32 PM Leonardo Bras <leobras@redhat.com> wrote: > > > > On Sun, Sep 10, 2023 at 04:29:03AM -0400, guoren@kernel.org wrote: > > > From: Guo Ren <guoren@linux.alibaba.com> > > > > > > The early version of T-Head C9xx cores has a store merge buffer > > > delay problem. The store merge buffer could improve the store queue > > > performance by merging multi-store requests, but when there are not > > > continued store requests, the prior single store request would be > > > waiting in the store queue for a long time. That would cause > > > significant problems for communication between multi-cores. This > > > problem was found on sg2042 & th1520 platforms with the qspinlock > > > lock torture test. > > > > > > So appending a fence w.o could immediately flush the store merge > > > buffer and let other cores see the write result. > > > > > > This will apply the WRITE_ONCE errata to handle the non-standard > > > behavior via appending a fence w.o instruction for WRITE_ONCE(). > > > > > > Signed-off-by: Guo Ren <guoren@linux.alibaba.com> > > > Signed-off-by: Guo Ren <guoren@kernel.org> > > > --- > > > arch/riscv/Kconfig.errata | 19 +++++++++++++++++++ > > > arch/riscv/errata/thead/errata.c | 20 ++++++++++++++++++++ > > > arch/riscv/include/asm/errata_list.h | 13 ------------- > > > arch/riscv/include/asm/rwonce.h | 24 ++++++++++++++++++++++++ > > > arch/riscv/include/asm/vendorid_list.h | 14 ++++++++++++++ > > > include/asm-generic/rwonce.h | 2 ++ > > > 6 files changed, 79 insertions(+), 13 deletions(-) > > > create mode 100644 arch/riscv/include/asm/rwonce.h > > > > > > diff --git a/arch/riscv/Kconfig.errata b/arch/riscv/Kconfig.errata > > > index 1aa85a427ff3..c919cc3f1a3a 100644 > > > --- a/arch/riscv/Kconfig.errata > > > +++ b/arch/riscv/Kconfig.errata > > > @@ -77,4 +77,23 @@ config ERRATA_THEAD_PMU > > > > > > If you don't know what to do here, say "Y". > > > > > > +config ERRATA_THEAD_WRITE_ONCE > > > + bool "Apply T-Head WRITE_ONCE errata" > > > + depends on ERRATA_THEAD > > > + default y > > > + help > > > + The early version of T-Head C9xx cores has a store merge buffer > > > + delay problem. The store merge buffer could improve the store queue > > > + performance by merging multi-store requests, but when there are no > > > + continued store requests, the prior single store request would be > > > + waiting in the store queue for a long time. That would cause > > > + significant problems for communication between multi-cores. Appending > > > + a fence w.o could immediately flush the store merge buffer and let > > > + other cores see the write result. > > > + > > > + This will apply the WRITE_ONCE errata to handle the non-standard > > > + behavior via appending a fence w.o instruction for WRITE_ONCE(). > > > + > > > + If you don't know what to do here, say "Y". > > > + > > > endmenu # "CPU errata selection" > > > diff --git a/arch/riscv/errata/thead/errata.c b/arch/riscv/errata/thead/errata.c > > > index be84b14f0118..751eb5a7f614 100644 > > > --- a/arch/riscv/errata/thead/errata.c > > > +++ b/arch/riscv/errata/thead/errata.c > > > @@ -69,6 +69,23 @@ static bool errata_probe_pmu(unsigned int stage, > > > return true; > > > } > > > > > > +static bool errata_probe_write_once(unsigned int stage, > > > + unsigned long arch_id, unsigned long impid) > > > +{ > > > + if (!IS_ENABLED(CONFIG_ERRATA_THEAD_WRITE_ONCE)) > > > + return false; > > > + > > > + /* target-c9xx cores report arch_id and impid as 0 */ > > > + if (arch_id != 0 || impid != 0) > > > + return false; > > > + > > > + if (stage == RISCV_ALTERNATIVES_BOOT || > > > + stage == RISCV_ALTERNATIVES_MODULE) > > > + return true; > > > + > > > + return false; > > > +} > > > + > > > static u32 thead_errata_probe(unsigned int stage, > > > unsigned long archid, unsigned long impid) > > > { > > > @@ -83,6 +100,9 @@ static u32 thead_errata_probe(unsigned int stage, > > > if (errata_probe_pmu(stage, archid, impid)) > > > cpu_req_errata |= BIT(ERRATA_THEAD_PMU); > > > > > > + if (errata_probe_write_once(stage, archid, impid)) > > > + cpu_req_errata |= BIT(ERRATA_THEAD_WRITE_ONCE); > > > + > > > return cpu_req_errata; > > > } > > > > > > diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h > > > index 712cab7adffe..fbb2b8d39321 100644 > > > --- a/arch/riscv/include/asm/errata_list.h > > > +++ b/arch/riscv/include/asm/errata_list.h > > > @@ -11,19 +11,6 @@ > > > #include <asm/hwcap.h> > > > #include <asm/vendorid_list.h> > > > > > > -#ifdef CONFIG_ERRATA_SIFIVE > > > -#define ERRATA_SIFIVE_CIP_453 0 > > > -#define ERRATA_SIFIVE_CIP_1200 1 > > > -#define ERRATA_SIFIVE_NUMBER 2 > > > -#endif > > > - > > > -#ifdef CONFIG_ERRATA_THEAD > > > -#define ERRATA_THEAD_PBMT 0 > > > -#define ERRATA_THEAD_CMO 1 > > > -#define ERRATA_THEAD_PMU 2 > > > -#define ERRATA_THEAD_NUMBER 3 > > > -#endif > > > - > > > > Here I understand you are moving stuff from errata_list.h to > > vendorid_list.h. Wouldn't it be better to do this on a separated patch > > before this one? > Okay. > > > > > I understand this is used here, but it looks like it's unrelated. > > > > > #ifdef __ASSEMBLY__ > > > > > > #define ALT_INSN_FAULT(x) \ > > > diff --git a/arch/riscv/include/asm/rwonce.h b/arch/riscv/include/asm/rwonce.h > > > new file mode 100644 > > > index 000000000000..be0b8864969d > > > --- /dev/null > > > +++ b/arch/riscv/include/asm/rwonce.h > > > @@ -0,0 +1,24 @@ > > > +/* SPDX-License-Identifier: GPL-2.0 */ > > > + > > > +#ifndef __ASM_RWONCE_H > > > +#define __ASM_RWONCE_H > > > + > > > +#include <linux/compiler_types.h> > > > +#include <asm/alternative-macros.h> > > > +#include <asm/vendorid_list.h> > > > + > > > +#define __WRITE_ONCE(x, val) \ > > > +do { \ > > > + *(volatile typeof(x) *)&(x) = (val); \ > > > + asm volatile(ALTERNATIVE( \ > > > + __nops(1), \ > > > + "fence w, o\n\t", \ > > > + THEAD_VENDOR_ID, \ > > > + ERRATA_THEAD_WRITE_ONCE, \ > > > + CONFIG_ERRATA_THEAD_WRITE_ONCE) \ > > > + : : : "memory"); \ > > > +} while (0) > > > + > > > +#include <asm-generic/rwonce.h> > > > + > > > +#endif /* __ASM_RWONCE_H */ > > > > IIUC the idea here is to have an alternative __WRITE_ONCE that replaces the > > asm-generic one. > > > > Honestly, this asm alternative here seems too much information, and too > > cryptic. I mean, yeah in the patch it all makes sense, but I imagine myself > > in the future looking at all this and trying to understand what is going > > on. > > > > Wouldn't it look better to have something like: > > > > ##### > > > > /* Some explanation like the one on Kconfig */ > > > > #define write_once_flush() \ > > do { \ > > asm volatile(ALTERNATIVE( \ > > __nops(1), \ > > "fence w, o\n\t", \ > > THEAD_VENDOR_ID, \ > > ERRATA_THEAD_WRITE_ONCE, \ > > CONFIG_ERRATA_THEAD_WRITE_ONCE) \ > > : : : "memory"); \ > > } while(0) > > > > > > #define __WRITE_ONCE(x, val) \ > > do { \ > > *(volatile typeof(x) *)&(x) = (val); \ > > write_once_flush(); \ > > } while(0) > > > > ##### > > > > > > This way I could quickly see there is a flush after the writting of > > WRITE_ONCE(), and this flush is the above "complicated" asm. > > > > What do you think? > Okay, good point, and I would take it. Thanks! Once you take the above suggestions, please include in your next patch: Reviewed-by: Leonardo Bras <leobras@redhat.com> > > > > > > diff --git a/arch/riscv/include/asm/vendorid_list.h b/arch/riscv/include/asm/vendorid_list.h > > > index cb89af3f0704..73078cfe4029 100644 > > > --- a/arch/riscv/include/asm/vendorid_list.h > > > +++ b/arch/riscv/include/asm/vendorid_list.h > > > @@ -8,4 +8,18 @@ > > > #define SIFIVE_VENDOR_ID 0x489 > > > #define THEAD_VENDOR_ID 0x5b7 > > > > > > +#ifdef CONFIG_ERRATA_SIFIVE > > > +#define ERRATA_SIFIVE_CIP_453 0 > > > +#define ERRATA_SIFIVE_CIP_1200 1 > > > +#define ERRATA_SIFIVE_NUMBER 2 > > > +#endif > > > + > > > +#ifdef CONFIG_ERRATA_THEAD > > > +#define ERRATA_THEAD_PBMT 0 > > > +#define ERRATA_THEAD_CMO 1 > > > +#define ERRATA_THEAD_PMU 2 > > > +#define ERRATA_THEAD_WRITE_ONCE 3 > > > +#define ERRATA_THEAD_NUMBER 4 > > > +#endif > > > + > > > #endif > > > diff --git a/include/asm-generic/rwonce.h b/include/asm-generic/rwonce.h > > > index 8d0a6280e982..fb07fe8c6e45 100644 > > > --- a/include/asm-generic/rwonce.h > > > +++ b/include/asm-generic/rwonce.h > > > @@ -50,10 +50,12 @@ > > > __READ_ONCE(x); \ > > > }) > > > > > > +#ifndef __WRITE_ONCE > > > #define __WRITE_ONCE(x, val) \ > > > do { \ > > > *(volatile typeof(x) *)&(x) = (val); \ > > > } while (0) > > > +#endif > > > > > > #define WRITE_ONCE(x, val) \ > > > do { \ > > > -- > > > 2.36.1 > > > > > > > > -- > Best Regards > Guo Ren >
diff --git a/arch/riscv/Kconfig.errata b/arch/riscv/Kconfig.errata index 1aa85a427ff3..c919cc3f1a3a 100644 --- a/arch/riscv/Kconfig.errata +++ b/arch/riscv/Kconfig.errata @@ -77,4 +77,23 @@ config ERRATA_THEAD_PMU If you don't know what to do here, say "Y". +config ERRATA_THEAD_WRITE_ONCE + bool "Apply T-Head WRITE_ONCE errata" + depends on ERRATA_THEAD + default y + help + The early version of T-Head C9xx cores has a store merge buffer + delay problem. The store merge buffer could improve the store queue + performance by merging multi-store requests, but when there are no + continued store requests, the prior single store request would be + waiting in the store queue for a long time. That would cause + significant problems for communication between multi-cores. Appending + a fence w.o could immediately flush the store merge buffer and let + other cores see the write result. + + This will apply the WRITE_ONCE errata to handle the non-standard + behavior via appending a fence w.o instruction for WRITE_ONCE(). + + If you don't know what to do here, say "Y". + endmenu # "CPU errata selection" diff --git a/arch/riscv/errata/thead/errata.c b/arch/riscv/errata/thead/errata.c index be84b14f0118..751eb5a7f614 100644 --- a/arch/riscv/errata/thead/errata.c +++ b/arch/riscv/errata/thead/errata.c @@ -69,6 +69,23 @@ static bool errata_probe_pmu(unsigned int stage, return true; } +static bool errata_probe_write_once(unsigned int stage, + unsigned long arch_id, unsigned long impid) +{ + if (!IS_ENABLED(CONFIG_ERRATA_THEAD_WRITE_ONCE)) + return false; + + /* target-c9xx cores report arch_id and impid as 0 */ + if (arch_id != 0 || impid != 0) + return false; + + if (stage == RISCV_ALTERNATIVES_BOOT || + stage == RISCV_ALTERNATIVES_MODULE) + return true; + + return false; +} + static u32 thead_errata_probe(unsigned int stage, unsigned long archid, unsigned long impid) { @@ -83,6 +100,9 @@ static u32 thead_errata_probe(unsigned int stage, if (errata_probe_pmu(stage, archid, impid)) cpu_req_errata |= BIT(ERRATA_THEAD_PMU); + if (errata_probe_write_once(stage, archid, impid)) + cpu_req_errata |= BIT(ERRATA_THEAD_WRITE_ONCE); + return cpu_req_errata; } diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h index 712cab7adffe..fbb2b8d39321 100644 --- a/arch/riscv/include/asm/errata_list.h +++ b/arch/riscv/include/asm/errata_list.h @@ -11,19 +11,6 @@ #include <asm/hwcap.h> #include <asm/vendorid_list.h> -#ifdef CONFIG_ERRATA_SIFIVE -#define ERRATA_SIFIVE_CIP_453 0 -#define ERRATA_SIFIVE_CIP_1200 1 -#define ERRATA_SIFIVE_NUMBER 2 -#endif - -#ifdef CONFIG_ERRATA_THEAD -#define ERRATA_THEAD_PBMT 0 -#define ERRATA_THEAD_CMO 1 -#define ERRATA_THEAD_PMU 2 -#define ERRATA_THEAD_NUMBER 3 -#endif - #ifdef __ASSEMBLY__ #define ALT_INSN_FAULT(x) \ diff --git a/arch/riscv/include/asm/rwonce.h b/arch/riscv/include/asm/rwonce.h new file mode 100644 index 000000000000..be0b8864969d --- /dev/null +++ b/arch/riscv/include/asm/rwonce.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __ASM_RWONCE_H +#define __ASM_RWONCE_H + +#include <linux/compiler_types.h> +#include <asm/alternative-macros.h> +#include <asm/vendorid_list.h> + +#define __WRITE_ONCE(x, val) \ +do { \ + *(volatile typeof(x) *)&(x) = (val); \ + asm volatile(ALTERNATIVE( \ + __nops(1), \ + "fence w, o\n\t", \ + THEAD_VENDOR_ID, \ + ERRATA_THEAD_WRITE_ONCE, \ + CONFIG_ERRATA_THEAD_WRITE_ONCE) \ + : : : "memory"); \ +} while (0) + +#include <asm-generic/rwonce.h> + +#endif /* __ASM_RWONCE_H */ diff --git a/arch/riscv/include/asm/vendorid_list.h b/arch/riscv/include/asm/vendorid_list.h index cb89af3f0704..73078cfe4029 100644 --- a/arch/riscv/include/asm/vendorid_list.h +++ b/arch/riscv/include/asm/vendorid_list.h @@ -8,4 +8,18 @@ #define SIFIVE_VENDOR_ID 0x489 #define THEAD_VENDOR_ID 0x5b7 +#ifdef CONFIG_ERRATA_SIFIVE +#define ERRATA_SIFIVE_CIP_453 0 +#define ERRATA_SIFIVE_CIP_1200 1 +#define ERRATA_SIFIVE_NUMBER 2 +#endif + +#ifdef CONFIG_ERRATA_THEAD +#define ERRATA_THEAD_PBMT 0 +#define ERRATA_THEAD_CMO 1 +#define ERRATA_THEAD_PMU 2 +#define ERRATA_THEAD_WRITE_ONCE 3 +#define ERRATA_THEAD_NUMBER 4 +#endif + #endif diff --git a/include/asm-generic/rwonce.h b/include/asm-generic/rwonce.h index 8d0a6280e982..fb07fe8c6e45 100644 --- a/include/asm-generic/rwonce.h +++ b/include/asm-generic/rwonce.h @@ -50,10 +50,12 @@ __READ_ONCE(x); \ }) +#ifndef __WRITE_ONCE #define __WRITE_ONCE(x, val) \ do { \ *(volatile typeof(x) *)&(x) = (val); \ } while (0) +#endif #define WRITE_ONCE(x, val) \ do { \