Message ID | 20240717061957.140712-2-alexghiti@rivosinc.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | Zacas/Zabha support and qspinlocks | expand |
On Wed, Jul 17, 2024 at 08:19:47AM GMT, Alexandre Ghiti wrote: > This adds runtime support for Zacas in cmpxchg operations. > > Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com> > --- > arch/riscv/Kconfig | 17 +++++++++++++++++ > arch/riscv/Makefile | 3 +++ > arch/riscv/include/asm/cmpxchg.h | 26 +++++++++++++++++++++++--- > 3 files changed, 43 insertions(+), 3 deletions(-) > > diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig > index 05ccba8ca33a..1caaedec88c7 100644 > --- a/arch/riscv/Kconfig > +++ b/arch/riscv/Kconfig > @@ -596,6 +596,23 @@ config RISCV_ISA_V_PREEMPTIVE > preemption. Enabling this config will result in higher memory > consumption due to the allocation of per-task's kernel Vector context. > > +config TOOLCHAIN_HAS_ZACAS > + bool > + default y > + depends on !64BIT || $(cc-option,-mabi=lp64 -march=rv64ima_zacas) > + depends on !32BIT || $(cc-option,-mabi=ilp32 -march=rv32ima_zacas) > + depends on AS_HAS_OPTION_ARCH > + > +config RISCV_ISA_ZACAS > + bool "Zacas extension support for atomic CAS" > + depends on TOOLCHAIN_HAS_ZACAS > + default y > + help > + Enable the use of the Zacas ISA-extension to implement kernel atomic > + cmpxchg operations when it is detected at boot. > + > + If you don't know what to do here, say Y. > + > config TOOLCHAIN_HAS_ZBB > bool > default y > diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile > index 06de9d365088..9fd13d7a9cc6 100644 > --- a/arch/riscv/Makefile > +++ b/arch/riscv/Makefile > @@ -85,6 +85,9 @@ endif > # Check if the toolchain supports Zihintpause extension > riscv-march-$(CONFIG_TOOLCHAIN_HAS_ZIHINTPAUSE) := $(riscv-march-y)_zihintpause > > +# Check if the toolchain supports Zacas > +riscv-march-$(CONFIG_TOOLCHAIN_HAS_ZACAS) := $(riscv-march-y)_zacas > + > # Remove F,D,V from isa string for all. Keep extensions between "fd" and "v" by > # matching non-v and non-multi-letter extensions out with the filter ([^v_]*) > KBUILD_CFLAGS += -march=$(shell echo $(riscv-march-y) | sed -E 's/(rv32ima|rv64ima)fd([^v_]*)v?/\1\2/') > diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h > index 808b4c78462e..5d38153e2f13 100644 > --- a/arch/riscv/include/asm/cmpxchg.h > +++ b/arch/riscv/include/asm/cmpxchg.h > @@ -9,6 +9,7 @@ > #include <linux/bug.h> > > #include <asm/fence.h> > +#include <asm/alternative.h> > > #define __arch_xchg_masked(sc_sfx, prepend, append, r, p, n) \ > ({ \ > @@ -134,21 +135,40 @@ > r = (__typeof__(*(p)))((__retx & __mask) >> __s); \ > }) > > -#define __arch_cmpxchg(lr_sfx, sc_sfx, prepend, append, r, p, co, o, n) \ > +#define __arch_cmpxchg(lr_sfx, sc_cas_sfx, prepend, append, r, p, co, o, n) \ I'd either not bother renaming sc_sfx or also rename it in _arch_cmpxchg. > ({ \ > + __label__ no_zacas, end; \ > register unsigned int __rc; \ > \ > + if (IS_ENABLED(CONFIG_RISCV_ISA_ZACAS)) { \ > + asm goto(ALTERNATIVE("j %[no_zacas]", "nop", 0, \ > + RISCV_ISA_EXT_ZACAS, 1) \ > + : : : : no_zacas); \ > + \ > + __asm__ __volatile__ ( \ > + prepend \ > + " amocas" sc_cas_sfx " %0, %z2, %1\n" \ > + append \ > + : "+&r" (r), "+A" (*(p)) \ > + : "rJ" (n) \ > + : "memory"); \ > + goto end; \ > + } \ > + \ > +no_zacas: \ > __asm__ __volatile__ ( \ > prepend \ > "0: lr" lr_sfx " %0, %2\n" \ > " bne %0, %z3, 1f\n" \ > - " sc" sc_sfx " %1, %z4, %2\n" \ > + " sc" sc_cas_sfx " %1, %z4, %2\n" \ > " bnez %1, 0b\n" \ > append \ > "1:\n" \ > : "=&r" (r), "=&r" (__rc), "+A" (*(p)) \ > : "rJ" (co o), "rJ" (n) \ > : "memory"); \ > + \ > +end:; \ > }) > > #define _arch_cmpxchg(ptr, old, new, sc_sfx, prepend, append) \ > @@ -156,7 +176,7 @@ > __typeof__(ptr) __ptr = (ptr); \ > __typeof__(*(__ptr)) __old = (old); \ > __typeof__(*(__ptr)) __new = (new); \ > - __typeof__(*(__ptr)) __ret; \ > + __typeof__(*(__ptr)) __ret = (old); \ Is this just to silence some compiler warnings? Can we point out whatever the reason is in the commit message? > \ > switch (sizeof(*__ptr)) { \ > case 1: \ > -- > 2.39.2 > Thanks, drew
Hi drew, On Wed, Jul 17, 2024 at 5:08 PM Andrew Jones <ajones@ventanamicro.com> wrote: > > On Wed, Jul 17, 2024 at 08:19:47AM GMT, Alexandre Ghiti wrote: > > This adds runtime support for Zacas in cmpxchg operations. > > > > Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com> > > --- > > arch/riscv/Kconfig | 17 +++++++++++++++++ > > arch/riscv/Makefile | 3 +++ > > arch/riscv/include/asm/cmpxchg.h | 26 +++++++++++++++++++++++--- > > 3 files changed, 43 insertions(+), 3 deletions(-) > > > > diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig > > index 05ccba8ca33a..1caaedec88c7 100644 > > --- a/arch/riscv/Kconfig > > +++ b/arch/riscv/Kconfig > > @@ -596,6 +596,23 @@ config RISCV_ISA_V_PREEMPTIVE > > preemption. Enabling this config will result in higher memory > > consumption due to the allocation of per-task's kernel Vector context. > > > > +config TOOLCHAIN_HAS_ZACAS > > + bool > > + default y > > + depends on !64BIT || $(cc-option,-mabi=lp64 -march=rv64ima_zacas) > > + depends on !32BIT || $(cc-option,-mabi=ilp32 -march=rv32ima_zacas) > > + depends on AS_HAS_OPTION_ARCH > > + > > +config RISCV_ISA_ZACAS > > + bool "Zacas extension support for atomic CAS" > > + depends on TOOLCHAIN_HAS_ZACAS > > + default y > > + help > > + Enable the use of the Zacas ISA-extension to implement kernel atomic > > + cmpxchg operations when it is detected at boot. > > + > > + If you don't know what to do here, say Y. > > + > > config TOOLCHAIN_HAS_ZBB > > bool > > default y > > diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile > > index 06de9d365088..9fd13d7a9cc6 100644 > > --- a/arch/riscv/Makefile > > +++ b/arch/riscv/Makefile > > @@ -85,6 +85,9 @@ endif > > # Check if the toolchain supports Zihintpause extension > > riscv-march-$(CONFIG_TOOLCHAIN_HAS_ZIHINTPAUSE) := $(riscv-march-y)_zihintpause > > > > +# Check if the toolchain supports Zacas > > +riscv-march-$(CONFIG_TOOLCHAIN_HAS_ZACAS) := $(riscv-march-y)_zacas > > + > > # Remove F,D,V from isa string for all. Keep extensions between "fd" and "v" by > > # matching non-v and non-multi-letter extensions out with the filter ([^v_]*) > > KBUILD_CFLAGS += -march=$(shell echo $(riscv-march-y) | sed -E 's/(rv32ima|rv64ima)fd([^v_]*)v?/\1\2/') > > diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h > > index 808b4c78462e..5d38153e2f13 100644 > > --- a/arch/riscv/include/asm/cmpxchg.h > > +++ b/arch/riscv/include/asm/cmpxchg.h > > @@ -9,6 +9,7 @@ > > #include <linux/bug.h> > > > > #include <asm/fence.h> > > +#include <asm/alternative.h> > > > > #define __arch_xchg_masked(sc_sfx, prepend, append, r, p, n) \ > > ({ \ > > @@ -134,21 +135,40 @@ > > r = (__typeof__(*(p)))((__retx & __mask) >> __s); \ > > }) > > > > -#define __arch_cmpxchg(lr_sfx, sc_sfx, prepend, append, r, p, co, o, n) \ > > +#define __arch_cmpxchg(lr_sfx, sc_cas_sfx, prepend, append, r, p, co, o, n) \ > > I'd either not bother renaming sc_sfx or also rename it in _arch_cmpxchg. I'll rename both then. > > > ({ \ > > + __label__ no_zacas, end; \ > > register unsigned int __rc; \ > > \ > > + if (IS_ENABLED(CONFIG_RISCV_ISA_ZACAS)) { \ > > + asm goto(ALTERNATIVE("j %[no_zacas]", "nop", 0, \ > > + RISCV_ISA_EXT_ZACAS, 1) \ > > + : : : : no_zacas); \ > > + \ > > + __asm__ __volatile__ ( \ > > + prepend \ > > + " amocas" sc_cas_sfx " %0, %z2, %1\n" \ > > + append \ > > + : "+&r" (r), "+A" (*(p)) \ > > + : "rJ" (n) \ > > + : "memory"); \ > > + goto end; \ > > + } \ > > + \ > > +no_zacas: \ > > __asm__ __volatile__ ( \ > > prepend \ > > "0: lr" lr_sfx " %0, %2\n" \ > > " bne %0, %z3, 1f\n" \ > > - " sc" sc_sfx " %1, %z4, %2\n" \ > > + " sc" sc_cas_sfx " %1, %z4, %2\n" \ > > " bnez %1, 0b\n" \ > > append \ > > "1:\n" \ > > : "=&r" (r), "=&r" (__rc), "+A" (*(p)) \ > > : "rJ" (co o), "rJ" (n) \ > > : "memory"); \ > > + \ > > +end:; \ > > }) > > > > #define _arch_cmpxchg(ptr, old, new, sc_sfx, prepend, append) \ > > @@ -156,7 +176,7 @@ > > __typeof__(ptr) __ptr = (ptr); \ > > __typeof__(*(__ptr)) __old = (old); \ > > __typeof__(*(__ptr)) __new = (new); \ > > - __typeof__(*(__ptr)) __ret; \ > > + __typeof__(*(__ptr)) __ret = (old); \ > > Is this just to silence some compiler warnings? Can we point out > whatever the reason is in the commit message? CAS expects to find the old value in rd (__ret) to check against the current value in memory before actually swapping with the new value. But both you and Andrea were confused by this, I'll make it more explicit. > > > \ > > switch (sizeof(*__ptr)) { \ > > case 1: \ > > -- > > 2.39.2 > > > > Thanks, > drew Thanks, Alex
Hi Alex, On 2024-07-17 1:19 AM, Alexandre Ghiti wrote: > This adds runtime support for Zacas in cmpxchg operations. > > Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com> > --- > arch/riscv/Kconfig | 17 +++++++++++++++++ > arch/riscv/Makefile | 3 +++ > arch/riscv/include/asm/cmpxchg.h | 26 +++++++++++++++++++++++--- > 3 files changed, 43 insertions(+), 3 deletions(-) > > diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig > index 05ccba8ca33a..1caaedec88c7 100644 > --- a/arch/riscv/Kconfig > +++ b/arch/riscv/Kconfig > @@ -596,6 +596,23 @@ config RISCV_ISA_V_PREEMPTIVE > preemption. Enabling this config will result in higher memory > consumption due to the allocation of per-task's kernel Vector context. > > +config TOOLCHAIN_HAS_ZACAS > + bool > + default y > + depends on !64BIT || $(cc-option,-mabi=lp64 -march=rv64ima_zacas) > + depends on !32BIT || $(cc-option,-mabi=ilp32 -march=rv32ima_zacas) > + depends on AS_HAS_OPTION_ARCH > + > +config RISCV_ISA_ZACAS > + bool "Zacas extension support for atomic CAS" > + depends on TOOLCHAIN_HAS_ZACAS > + default y > + help > + Enable the use of the Zacas ISA-extension to implement kernel atomic > + cmpxchg operations when it is detected at boot. > + > + If you don't know what to do here, say Y. > + > config TOOLCHAIN_HAS_ZBB > bool > default y > diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile > index 06de9d365088..9fd13d7a9cc6 100644 > --- a/arch/riscv/Makefile > +++ b/arch/riscv/Makefile > @@ -85,6 +85,9 @@ endif > # Check if the toolchain supports Zihintpause extension > riscv-march-$(CONFIG_TOOLCHAIN_HAS_ZIHINTPAUSE) := $(riscv-march-y)_zihintpause > > +# Check if the toolchain supports Zacas > +riscv-march-$(CONFIG_TOOLCHAIN_HAS_ZACAS) := $(riscv-march-y)_zacas > + > # Remove F,D,V from isa string for all. Keep extensions between "fd" and "v" by > # matching non-v and non-multi-letter extensions out with the filter ([^v_]*) > KBUILD_CFLAGS += -march=$(shell echo $(riscv-march-y) | sed -E 's/(rv32ima|rv64ima)fd([^v_]*)v?/\1\2/') > diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h > index 808b4c78462e..5d38153e2f13 100644 > --- a/arch/riscv/include/asm/cmpxchg.h > +++ b/arch/riscv/include/asm/cmpxchg.h > @@ -9,6 +9,7 @@ > #include <linux/bug.h> > > #include <asm/fence.h> > +#include <asm/alternative.h> > > #define __arch_xchg_masked(sc_sfx, prepend, append, r, p, n) \ > ({ \ > @@ -134,21 +135,40 @@ > r = (__typeof__(*(p)))((__retx & __mask) >> __s); \ > }) > > -#define __arch_cmpxchg(lr_sfx, sc_sfx, prepend, append, r, p, co, o, n) \ > +#define __arch_cmpxchg(lr_sfx, sc_cas_sfx, prepend, append, r, p, co, o, n) \ > ({ \ > + __label__ no_zacas, end; \ > register unsigned int __rc; \ > \ > + if (IS_ENABLED(CONFIG_RISCV_ISA_ZACAS)) { \ > + asm goto(ALTERNATIVE("j %[no_zacas]", "nop", 0, \ > + RISCV_ISA_EXT_ZACAS, 1) \ > + : : : : no_zacas); \ > + \ > + __asm__ __volatile__ ( \ > + prepend \ > + " amocas" sc_cas_sfx " %0, %z2, %1\n" \ > + append \ > + : "+&r" (r), "+A" (*(p)) \ > + : "rJ" (n) \ > + : "memory"); \ > + goto end; \ > + } \ > + \ > +no_zacas: \ > __asm__ __volatile__ ( \ > prepend \ > "0: lr" lr_sfx " %0, %2\n" \ > " bne %0, %z3, 1f\n" \ > - " sc" sc_sfx " %1, %z4, %2\n" \ > + " sc" sc_cas_sfx " %1, %z4, %2\n" \ > " bnez %1, 0b\n" \ > append \ This would probably be a good place to use inline ALTERNATIVE instead of an asm goto. It saves overall code size, and a jump in the non-Zacas case, at the cost of 3 nops in the Zacas case. (And all the nops can go after the amocas, where they will likely be hidden by the amocas latency.) Regards, Samuel > "1:\n" \ > : "=&r" (r), "=&r" (__rc), "+A" (*(p)) \ > : "rJ" (co o), "rJ" (n) \ > : "memory"); \ > + \ > +end:; \ > }) > > #define _arch_cmpxchg(ptr, old, new, sc_sfx, prepend, append) \ > @@ -156,7 +176,7 @@ > __typeof__(ptr) __ptr = (ptr); \ > __typeof__(*(__ptr)) __old = (old); \ > __typeof__(*(__ptr)) __new = (new); \ > - __typeof__(*(__ptr)) __ret; \ > + __typeof__(*(__ptr)) __ret = (old); \ > \ > switch (sizeof(*__ptr)) { \ > case 1: \
On Fri, Jul 19, 2024 at 2:45 AM Samuel Holland <samuel.holland@sifive.com> wrote: > > Hi Alex, > > On 2024-07-17 1:19 AM, Alexandre Ghiti wrote: > > This adds runtime support for Zacas in cmpxchg operations. > > > > Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com> > > --- > > arch/riscv/Kconfig | 17 +++++++++++++++++ > > arch/riscv/Makefile | 3 +++ > > arch/riscv/include/asm/cmpxchg.h | 26 +++++++++++++++++++++++--- > > 3 files changed, 43 insertions(+), 3 deletions(-) > > > > diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig > > index 05ccba8ca33a..1caaedec88c7 100644 > > --- a/arch/riscv/Kconfig > > +++ b/arch/riscv/Kconfig > > @@ -596,6 +596,23 @@ config RISCV_ISA_V_PREEMPTIVE > > preemption. Enabling this config will result in higher memory > > consumption due to the allocation of per-task's kernel Vector context. > > > > +config TOOLCHAIN_HAS_ZACAS > > + bool > > + default y > > + depends on !64BIT || $(cc-option,-mabi=lp64 -march=rv64ima_zacas) > > + depends on !32BIT || $(cc-option,-mabi=ilp32 -march=rv32ima_zacas) > > + depends on AS_HAS_OPTION_ARCH > > + > > +config RISCV_ISA_ZACAS > > + bool "Zacas extension support for atomic CAS" > > + depends on TOOLCHAIN_HAS_ZACAS > > + default y > > + help > > + Enable the use of the Zacas ISA-extension to implement kernel atomic > > + cmpxchg operations when it is detected at boot. > > + > > + If you don't know what to do here, say Y. > > + > > config TOOLCHAIN_HAS_ZBB > > bool > > default y > > diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile > > index 06de9d365088..9fd13d7a9cc6 100644 > > --- a/arch/riscv/Makefile > > +++ b/arch/riscv/Makefile > > @@ -85,6 +85,9 @@ endif > > # Check if the toolchain supports Zihintpause extension > > riscv-march-$(CONFIG_TOOLCHAIN_HAS_ZIHINTPAUSE) := $(riscv-march-y)_zihintpause > > > > +# Check if the toolchain supports Zacas > > +riscv-march-$(CONFIG_TOOLCHAIN_HAS_ZACAS) := $(riscv-march-y)_zacas > > + > > # Remove F,D,V from isa string for all. Keep extensions between "fd" and "v" by > > # matching non-v and non-multi-letter extensions out with the filter ([^v_]*) > > KBUILD_CFLAGS += -march=$(shell echo $(riscv-march-y) | sed -E 's/(rv32ima|rv64ima)fd([^v_]*)v?/\1\2/') > > diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h > > index 808b4c78462e..5d38153e2f13 100644 > > --- a/arch/riscv/include/asm/cmpxchg.h > > +++ b/arch/riscv/include/asm/cmpxchg.h > > @@ -9,6 +9,7 @@ > > #include <linux/bug.h> > > > > #include <asm/fence.h> > > +#include <asm/alternative.h> > > > > #define __arch_xchg_masked(sc_sfx, prepend, append, r, p, n) \ > > ({ \ > > @@ -134,21 +135,40 @@ > > r = (__typeof__(*(p)))((__retx & __mask) >> __s); \ > > }) > > > > -#define __arch_cmpxchg(lr_sfx, sc_sfx, prepend, append, r, p, co, o, n) \ > > +#define __arch_cmpxchg(lr_sfx, sc_cas_sfx, prepend, append, r, p, co, o, n) \ > > ({ \ > > + __label__ no_zacas, end; \ > > register unsigned int __rc; \ > > \ > > + if (IS_ENABLED(CONFIG_RISCV_ISA_ZACAS)) { \ > > + asm goto(ALTERNATIVE("j %[no_zacas]", "nop", 0, \ > > + RISCV_ISA_EXT_ZACAS, 1) \ > > + : : : : no_zacas); \ > > + \ > > + __asm__ __volatile__ ( \ > > + prepend \ > > + " amocas" sc_cas_sfx " %0, %z2, %1\n" \ > > + append \ > > + : "+&r" (r), "+A" (*(p)) \ > > + : "rJ" (n) \ > > + : "memory"); \ > > + goto end; \ > > + } \ > > + \ > > +no_zacas: \ > > __asm__ __volatile__ ( \ > > prepend \ > > "0: lr" lr_sfx " %0, %2\n" \ > > " bne %0, %z3, 1f\n" \ > > - " sc" sc_sfx " %1, %z4, %2\n" \ > > + " sc" sc_cas_sfx " %1, %z4, %2\n" \ > > " bnez %1, 0b\n" \ > > append \ > > This would probably be a good place to use inline ALTERNATIVE instead of an asm > goto. It saves overall code size, and a jump in the non-Zacas case, at the cost > of 3 nops in the Zacas case. (And all the nops can go after the amocas, where > they will likely be hidden by the amocas latency.) That's what Conor proposed indeed. I have just given it a try, but it does not work. The number of instructions in the zacas asm inline is different in the case of the fully-ordered version so I cannot set a unique number of nops. I could pass this information from arch_cmpxchg() down to __arch_cmpxchg() but those macros are already complex enough so I'd rather not add another parameter. This suggestion unfortunately cannot be applied to __arch_cmpxchg_masked(), nor __arch_xchg_masked(). So unless you and Conor really insist, I'll drop the idea! Thanks, Alex > > Regards, > Samuel > > > "1:\n" \ > > : "=&r" (r), "=&r" (__rc), "+A" (*(p)) \ > > : "rJ" (co o), "rJ" (n) \ > > : "memory"); \ > > + \ > > +end:; \ > > }) > > > > #define _arch_cmpxchg(ptr, old, new, sc_sfx, prepend, append) \ > > @@ -156,7 +176,7 @@ > > __typeof__(ptr) __ptr = (ptr); \ > > __typeof__(*(__ptr)) __old = (old); \ > > __typeof__(*(__ptr)) __new = (new); \ > > - __typeof__(*(__ptr)) __ret; \ > > + __typeof__(*(__ptr)) __ret = (old); \ > > \ > > switch (sizeof(*__ptr)) { \ > > case 1: \ >
On Fri, Jul 19, 2024 at 1:48 PM Alexandre Ghiti <alexghiti@rivosinc.com> wrote: > > On Fri, Jul 19, 2024 at 2:45 AM Samuel Holland > <samuel.holland@sifive.com> wrote: > > > > Hi Alex, > > > > On 2024-07-17 1:19 AM, Alexandre Ghiti wrote: > > > This adds runtime support for Zacas in cmpxchg operations. > > > > > > Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com> > > > --- > > > arch/riscv/Kconfig | 17 +++++++++++++++++ > > > arch/riscv/Makefile | 3 +++ > > > arch/riscv/include/asm/cmpxchg.h | 26 +++++++++++++++++++++++--- > > > 3 files changed, 43 insertions(+), 3 deletions(-) > > > > > > diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig > > > index 05ccba8ca33a..1caaedec88c7 100644 > > > --- a/arch/riscv/Kconfig > > > +++ b/arch/riscv/Kconfig > > > @@ -596,6 +596,23 @@ config RISCV_ISA_V_PREEMPTIVE > > > preemption. Enabling this config will result in higher memory > > > consumption due to the allocation of per-task's kernel Vector context. > > > > > > +config TOOLCHAIN_HAS_ZACAS > > > + bool > > > + default y > > > + depends on !64BIT || $(cc-option,-mabi=lp64 -march=rv64ima_zacas) > > > + depends on !32BIT || $(cc-option,-mabi=ilp32 -march=rv32ima_zacas) > > > + depends on AS_HAS_OPTION_ARCH > > > + > > > +config RISCV_ISA_ZACAS > > > + bool "Zacas extension support for atomic CAS" > > > + depends on TOOLCHAIN_HAS_ZACAS > > > + default y > > > + help > > > + Enable the use of the Zacas ISA-extension to implement kernel atomic > > > + cmpxchg operations when it is detected at boot. > > > + > > > + If you don't know what to do here, say Y. > > > + > > > config TOOLCHAIN_HAS_ZBB > > > bool > > > default y > > > diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile > > > index 06de9d365088..9fd13d7a9cc6 100644 > > > --- a/arch/riscv/Makefile > > > +++ b/arch/riscv/Makefile > > > @@ -85,6 +85,9 @@ endif > > > # Check if the toolchain supports Zihintpause extension > > > riscv-march-$(CONFIG_TOOLCHAIN_HAS_ZIHINTPAUSE) := $(riscv-march-y)_zihintpause > > > > > > +# Check if the toolchain supports Zacas > > > +riscv-march-$(CONFIG_TOOLCHAIN_HAS_ZACAS) := $(riscv-march-y)_zacas > > > + > > > # Remove F,D,V from isa string for all. Keep extensions between "fd" and "v" by > > > # matching non-v and non-multi-letter extensions out with the filter ([^v_]*) > > > KBUILD_CFLAGS += -march=$(shell echo $(riscv-march-y) | sed -E 's/(rv32ima|rv64ima)fd([^v_]*)v?/\1\2/') > > > diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h > > > index 808b4c78462e..5d38153e2f13 100644 > > > --- a/arch/riscv/include/asm/cmpxchg.h > > > +++ b/arch/riscv/include/asm/cmpxchg.h > > > @@ -9,6 +9,7 @@ > > > #include <linux/bug.h> > > > > > > #include <asm/fence.h> > > > +#include <asm/alternative.h> > > > > > > #define __arch_xchg_masked(sc_sfx, prepend, append, r, p, n) \ > > > ({ \ > > > @@ -134,21 +135,40 @@ > > > r = (__typeof__(*(p)))((__retx & __mask) >> __s); \ > > > }) > > > > > > -#define __arch_cmpxchg(lr_sfx, sc_sfx, prepend, append, r, p, co, o, n) \ > > > +#define __arch_cmpxchg(lr_sfx, sc_cas_sfx, prepend, append, r, p, co, o, n) \ > > > ({ \ > > > + __label__ no_zacas, end; \ > > > register unsigned int __rc; \ > > > \ > > > + if (IS_ENABLED(CONFIG_RISCV_ISA_ZACAS)) { \ > > > + asm goto(ALTERNATIVE("j %[no_zacas]", "nop", 0, \ > > > + RISCV_ISA_EXT_ZACAS, 1) \ > > > + : : : : no_zacas); \ > > > + \ > > > + __asm__ __volatile__ ( \ > > > + prepend \ > > > + " amocas" sc_cas_sfx " %0, %z2, %1\n" \ > > > + append \ > > > + : "+&r" (r), "+A" (*(p)) \ > > > + : "rJ" (n) \ > > > + : "memory"); \ > > > + goto end; \ > > > + } \ > > > + \ > > > +no_zacas: \ > > > __asm__ __volatile__ ( \ > > > prepend \ > > > "0: lr" lr_sfx " %0, %2\n" \ > > > " bne %0, %z3, 1f\n" \ > > > - " sc" sc_sfx " %1, %z4, %2\n" \ > > > + " sc" sc_cas_sfx " %1, %z4, %2\n" \ > > > " bnez %1, 0b\n" \ > > > append \ > > > > This would probably be a good place to use inline ALTERNATIVE instead of an asm > > goto. It saves overall code size, and a jump in the non-Zacas case, at the cost > > of 3 nops in the Zacas case. (And all the nops can go after the amocas, where > > they will likely be hidden by the amocas latency.) > > That's what Conor proposed indeed. > > I have just given it a try, but it does not work. The number of > instructions in the zacas asm inline is different in the case of the > fully-ordered version so I cannot set a unique number of nops. I could > pass this information from arch_cmpxchg() down to __arch_cmpxchg() but > those macros are already complex enough so I'd rather not add another > parameter. > > This suggestion unfortunately cannot be applied to > __arch_cmpxchg_masked(), nor __arch_xchg_masked(). > > So unless you and Conor really insist, I'll drop the idea! Or I can pass a nop when the full barrier is not needed, and it works! I'll probably keep this version then since it avoids the introduction of new macros or the use of a static branch to circumvent the llvm bug. > > Thanks, > > Alex > > > > > > Regards, > > Samuel > > > > > "1:\n" \ > > > : "=&r" (r), "=&r" (__rc), "+A" (*(p)) \ > > > : "rJ" (co o), "rJ" (n) \ > > > : "memory"); \ > > > + \ > > > +end:; \ > > > }) > > > > > > #define _arch_cmpxchg(ptr, old, new, sc_sfx, prepend, append) \ > > > @@ -156,7 +176,7 @@ > > > __typeof__(ptr) __ptr = (ptr); \ > > > __typeof__(*(__ptr)) __old = (old); \ > > > __typeof__(*(__ptr)) __new = (new); \ > > > - __typeof__(*(__ptr)) __ret; \ > > > + __typeof__(*(__ptr)) __ret = (old); \ > > > \ > > > switch (sizeof(*__ptr)) { \ > > > case 1: \ > >
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 05ccba8ca33a..1caaedec88c7 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -596,6 +596,23 @@ config RISCV_ISA_V_PREEMPTIVE preemption. Enabling this config will result in higher memory consumption due to the allocation of per-task's kernel Vector context. +config TOOLCHAIN_HAS_ZACAS + bool + default y + depends on !64BIT || $(cc-option,-mabi=lp64 -march=rv64ima_zacas) + depends on !32BIT || $(cc-option,-mabi=ilp32 -march=rv32ima_zacas) + depends on AS_HAS_OPTION_ARCH + +config RISCV_ISA_ZACAS + bool "Zacas extension support for atomic CAS" + depends on TOOLCHAIN_HAS_ZACAS + default y + help + Enable the use of the Zacas ISA-extension to implement kernel atomic + cmpxchg operations when it is detected at boot. + + If you don't know what to do here, say Y. + config TOOLCHAIN_HAS_ZBB bool default y diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index 06de9d365088..9fd13d7a9cc6 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -85,6 +85,9 @@ endif # Check if the toolchain supports Zihintpause extension riscv-march-$(CONFIG_TOOLCHAIN_HAS_ZIHINTPAUSE) := $(riscv-march-y)_zihintpause +# Check if the toolchain supports Zacas +riscv-march-$(CONFIG_TOOLCHAIN_HAS_ZACAS) := $(riscv-march-y)_zacas + # Remove F,D,V from isa string for all. Keep extensions between "fd" and "v" by # matching non-v and non-multi-letter extensions out with the filter ([^v_]*) KBUILD_CFLAGS += -march=$(shell echo $(riscv-march-y) | sed -E 's/(rv32ima|rv64ima)fd([^v_]*)v?/\1\2/') diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h index 808b4c78462e..5d38153e2f13 100644 --- a/arch/riscv/include/asm/cmpxchg.h +++ b/arch/riscv/include/asm/cmpxchg.h @@ -9,6 +9,7 @@ #include <linux/bug.h> #include <asm/fence.h> +#include <asm/alternative.h> #define __arch_xchg_masked(sc_sfx, prepend, append, r, p, n) \ ({ \ @@ -134,21 +135,40 @@ r = (__typeof__(*(p)))((__retx & __mask) >> __s); \ }) -#define __arch_cmpxchg(lr_sfx, sc_sfx, prepend, append, r, p, co, o, n) \ +#define __arch_cmpxchg(lr_sfx, sc_cas_sfx, prepend, append, r, p, co, o, n) \ ({ \ + __label__ no_zacas, end; \ register unsigned int __rc; \ \ + if (IS_ENABLED(CONFIG_RISCV_ISA_ZACAS)) { \ + asm goto(ALTERNATIVE("j %[no_zacas]", "nop", 0, \ + RISCV_ISA_EXT_ZACAS, 1) \ + : : : : no_zacas); \ + \ + __asm__ __volatile__ ( \ + prepend \ + " amocas" sc_cas_sfx " %0, %z2, %1\n" \ + append \ + : "+&r" (r), "+A" (*(p)) \ + : "rJ" (n) \ + : "memory"); \ + goto end; \ + } \ + \ +no_zacas: \ __asm__ __volatile__ ( \ prepend \ "0: lr" lr_sfx " %0, %2\n" \ " bne %0, %z3, 1f\n" \ - " sc" sc_sfx " %1, %z4, %2\n" \ + " sc" sc_cas_sfx " %1, %z4, %2\n" \ " bnez %1, 0b\n" \ append \ "1:\n" \ : "=&r" (r), "=&r" (__rc), "+A" (*(p)) \ : "rJ" (co o), "rJ" (n) \ : "memory"); \ + \ +end:; \ }) #define _arch_cmpxchg(ptr, old, new, sc_sfx, prepend, append) \ @@ -156,7 +176,7 @@ __typeof__(ptr) __ptr = (ptr); \ __typeof__(*(__ptr)) __old = (old); \ __typeof__(*(__ptr)) __new = (new); \ - __typeof__(*(__ptr)) __ret; \ + __typeof__(*(__ptr)) __ret = (old); \ \ switch (sizeof(*__ptr)) { \ case 1: \
This adds runtime support for Zacas in cmpxchg operations. Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com> --- arch/riscv/Kconfig | 17 +++++++++++++++++ arch/riscv/Makefile | 3 +++ arch/riscv/include/asm/cmpxchg.h | 26 +++++++++++++++++++++++--- 3 files changed, 43 insertions(+), 3 deletions(-)