Message ID | 20240626130347.520750-4-alexghiti@rivosinc.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | Zacas/Zabha support and qspinlocks | expand |
> -#define __arch_cmpxchg_masked(sc_sfx, prepend, append, r, p, o, n) \ > +#define __arch_cmpxchg_masked(sc_sfx, cas_sfx, prepend, append, r, p, o, n) \ > ({ \ > + __label__ no_zacas, zabha, end; \ > + \ > + if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA)) { \ > + asm goto(ALTERNATIVE("j %[no_zacas]", "nop", 0, \ > + RISCV_ISA_EXT_ZACAS, 1) \ > + : : : : no_zacas); \ > + asm goto(ALTERNATIVE("nop", "j %[zabha]", 0, \ > + RISCV_ISA_EXT_ZABHA, 1) \ > + : : : : zabha); \ > + } \ > + \ > +no_zacas:; \ > u32 *__ptr32b = (u32 *)((ulong)(p) & ~0x3); \ > ulong __s = ((ulong)(p) & (0x4 - sizeof(*p))) * BITS_PER_BYTE; \ > ulong __mask = GENMASK(((sizeof(*p)) * BITS_PER_BYTE) - 1, 0) \ > @@ -133,6 +145,19 @@ > : "memory"); \ > \ > r = (__typeof__(*(p)))((__retx & __mask) >> __s); \ > + goto end; \ > + \ > +zabha: \ > + if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA)) { \ > + __asm__ __volatile__ ( \ > + prepend \ > + " amocas" cas_sfx " %0, %z2, %1\n" \ > + append \ > + : "+&r" (r), "+A" (*(p)) \ > + : "rJ" (n) \ > + : "memory"); \ > + } \ > +end:; \ > }) I admit that I found this all quite difficult to read; IIUC, this is missing an IS_ENABLED(CONFIG_RISCV_ISA_ZACAS) check. How about adding such a check under the zabha: label (replacing/in place of the second IS_ENABLED(CONFIG_RISCV_ISA_ZABHA) check) and moving the corresponding asm goto statement there, perhaps as follows? (on top of this patch) Also, the patch presents the first occurrence of RISCV_ISA_EXT_ZABHA; perhaps worth moving the hwcap/cpufeature changes from patch #6 here? Andrea diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h index b9a3fdcec919..3c913afec150 100644 --- a/arch/riscv/include/asm/cmpxchg.h +++ b/arch/riscv/include/asm/cmpxchg.h @@ -110,15 +110,12 @@ __label__ no_zacas, zabha, end; \ \ if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA)) { \ - asm goto(ALTERNATIVE("j %[no_zacas]", "nop", 0, \ - RISCV_ISA_EXT_ZACAS, 1) \ - : : : : no_zacas); \ asm goto(ALTERNATIVE("nop", "j %[zabha]", 0, \ RISCV_ISA_EXT_ZABHA, 1) \ : : : : zabha); \ } \ \ -no_zacas:; \ +no_zacas: \ u32 *__ptr32b = (u32 *)((ulong)(p) & ~0x3); \ ulong __s = ((ulong)(p) & (0x4 - sizeof(*p))) * BITS_PER_BYTE; \ ulong __mask = GENMASK(((sizeof(*p)) * BITS_PER_BYTE) - 1, 0) \ @@ -148,16 +145,20 @@ no_zacas:; \ goto end; \ \ zabha: \ - if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA)) { \ - __asm__ __volatile__ ( \ - prepend \ - " amocas" cas_sfx " %0, %z2, %1\n" \ - append \ - : "+&r" (r), "+A" (*(p)) \ - : "rJ" (n) \ - : "memory"); \ + if (IS_ENABLED(CONFIG_RISCV_ISA_ZAZAS)) { \ + asm goto(ALTERNATIVE("j %[no_zacas]", "nop", 0, \ + RISCV_ISA_EXT_ZACAS, 1) \ + : : : : no_zacas); \ } \ -end:; \ + \ + __asm__ __volatile__ ( \ + prepend \ + " amocas" cas_sfx " %0, %z2, %1\n" \ + append \ + : "+&r" (r), "+A" (*(p)) \ + : "rJ" (n) \ + : "memory"); \ +end: \ }) #define __arch_cmpxchg(lr_sfx, sc_cas_sfx, prepend, append, r, p, co, o, n) \
> I admit that I found this all quite difficult to read; IIUC, this is > missing an IS_ENABLED(CONFIG_RISCV_ISA_ZACAS) check. How about adding > such a check under the zabha: label (replacing/in place of the second > IS_ENABLED(CONFIG_RISCV_ISA_ZABHA) check) and moving the corresponding > asm goto statement there, perhaps as follows? (on top of this patch) [...] > diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h > index b9a3fdcec919..3c913afec150 100644 > --- a/arch/riscv/include/asm/cmpxchg.h > +++ b/arch/riscv/include/asm/cmpxchg.h > @@ -110,15 +110,12 @@ > __label__ no_zacas, zabha, end; \ > \ > if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA)) { \ > - asm goto(ALTERNATIVE("j %[no_zacas]", "nop", 0, \ > - RISCV_ISA_EXT_ZACAS, 1) \ > - : : : : no_zacas); \ > asm goto(ALTERNATIVE("nop", "j %[zabha]", 0, \ > RISCV_ISA_EXT_ZABHA, 1) \ > : : : : zabha); \ > } \ > \ > -no_zacas:; \ > +no_zacas: \ > u32 *__ptr32b = (u32 *)((ulong)(p) & ~0x3); \ > ulong __s = ((ulong)(p) & (0x4 - sizeof(*p))) * BITS_PER_BYTE; \ > ulong __mask = GENMASK(((sizeof(*p)) * BITS_PER_BYTE) - 1, 0) \ > @@ -148,16 +145,20 @@ no_zacas:; \ > goto end; \ > \ > zabha: \ > - if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA)) { \ > - __asm__ __volatile__ ( \ > - prepend \ > - " amocas" cas_sfx " %0, %z2, %1\n" \ > - append \ > - : "+&r" (r), "+A" (*(p)) \ > - : "rJ" (n) \ > - : "memory"); \ > + if (IS_ENABLED(CONFIG_RISCV_ISA_ZAZAS)) { \ > + asm goto(ALTERNATIVE("j %[no_zacas]", "nop", 0, \ > + RISCV_ISA_EXT_ZACAS, 1) \ > + : : : : no_zacas); \ > } \ > -end:; \ > + \ > + __asm__ __volatile__ ( \ > + prepend \ > + " amocas" cas_sfx " %0, %z2, %1\n" \ > + append \ > + : "+&r" (r), "+A" (*(p)) \ > + : "rJ" (n) \ > + : "memory"); \ > +end: \ > }) > > #define __arch_cmpxchg(lr_sfx, sc_cas_sfx, prepend, append, r, p, co, o, n) \ Unfortunately, this diff won't work e.g. when ZABHA is supported and detected at boot while ZACAS is not supported; more thinking required... Andrea
On 27/06/2024 13:53, Andrea Parri wrote: >> -#define __arch_cmpxchg_masked(sc_sfx, prepend, append, r, p, o, n) \ >> +#define __arch_cmpxchg_masked(sc_sfx, cas_sfx, prepend, append, r, p, o, n) \ >> ({ \ >> + __label__ no_zacas, zabha, end; \ >> + \ >> + if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA)) { \ >> + asm goto(ALTERNATIVE("j %[no_zacas]", "nop", 0, \ >> + RISCV_ISA_EXT_ZACAS, 1) \ >> + : : : : no_zacas); \ >> + asm goto(ALTERNATIVE("nop", "j %[zabha]", 0, \ >> + RISCV_ISA_EXT_ZABHA, 1) \ >> + : : : : zabha); \ >> + } \ >> + \ >> +no_zacas:; \ >> u32 *__ptr32b = (u32 *)((ulong)(p) & ~0x3); \ >> ulong __s = ((ulong)(p) & (0x4 - sizeof(*p))) * BITS_PER_BYTE; \ >> ulong __mask = GENMASK(((sizeof(*p)) * BITS_PER_BYTE) - 1, 0) \ >> @@ -133,6 +145,19 @@ >> : "memory"); \ >> \ >> r = (__typeof__(*(p)))((__retx & __mask) >> __s); \ >> + goto end; \ >> + \ >> +zabha: \ >> + if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA)) { \ >> + __asm__ __volatile__ ( \ >> + prepend \ >> + " amocas" cas_sfx " %0, %z2, %1\n" \ >> + append \ >> + : "+&r" (r), "+A" (*(p)) \ >> + : "rJ" (n) \ >> + : "memory"); \ >> + } \ >> +end:; \ >> }) > I admit that I found this all quite difficult to read; IIUC, this is > missing an IS_ENABLED(CONFIG_RISCV_ISA_ZACAS) check. I'm not sure we need the zacas check here, since we could use a toolchain that supports zabha but not zacas, run this on a zabha/zacas platform and it would work. > How about adding > such a check under the zabha: label (replacing/in place of the second > IS_ENABLED(CONFIG_RISCV_ISA_ZABHA) check) and moving the corresponding > asm goto statement there, perhaps as follows? (on top of this patch) If that makes things clearer for you, sure, I can do that. > > Also, the patch presents the first occurrence of RISCV_ISA_EXT_ZABHA; > perhaps worth moving the hwcap/cpufeature changes from patch #6 here? > > Andrea > > diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h > index b9a3fdcec919..3c913afec150 100644 > --- a/arch/riscv/include/asm/cmpxchg.h > +++ b/arch/riscv/include/asm/cmpxchg.h > @@ -110,15 +110,12 @@ > __label__ no_zacas, zabha, end; \ > \ > if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA)) { \ > - asm goto(ALTERNATIVE("j %[no_zacas]", "nop", 0, \ > - RISCV_ISA_EXT_ZACAS, 1) \ > - : : : : no_zacas); \ > asm goto(ALTERNATIVE("nop", "j %[zabha]", 0, \ > RISCV_ISA_EXT_ZABHA, 1) \ > : : : : zabha); \ > } \ > \ > -no_zacas:; \ > +no_zacas: \ > u32 *__ptr32b = (u32 *)((ulong)(p) & ~0x3); \ > ulong __s = ((ulong)(p) & (0x4 - sizeof(*p))) * BITS_PER_BYTE; \ > ulong __mask = GENMASK(((sizeof(*p)) * BITS_PER_BYTE) - 1, 0) \ > @@ -148,16 +145,20 @@ no_zacas:; \ > goto end; \ > \ > zabha: \ > - if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA)) { \ But we need to keep this check, otherwise it would fail to compile on toolchains without Zabha support. > - __asm__ __volatile__ ( \ > - prepend \ > - " amocas" cas_sfx " %0, %z2, %1\n" \ > - append \ > - : "+&r" (r), "+A" (*(p)) \ > - : "rJ" (n) \ > - : "memory"); \ > + if (IS_ENABLED(CONFIG_RISCV_ISA_ZAZAS)) { \ > + asm goto(ALTERNATIVE("j %[no_zacas]", "nop", 0, \ > + RISCV_ISA_EXT_ZACAS, 1) \ > + : : : : no_zacas); \ > } \ > -end:; \ > + \ > + __asm__ __volatile__ ( \ > + prepend \ > + " amocas" cas_sfx " %0, %z2, %1\n" \ > + append \ > + : "+&r" (r), "+A" (*(p)) \ > + : "rJ" (n) \ > + : "memory"); \ > +end: \ > }) > > #define __arch_cmpxchg(lr_sfx, sc_cas_sfx, prepend, append, r, p, co, o, n) \ > > _______________________________________________ > linux-riscv mailing list > linux-riscv@lists.infradead.org > http://lists.infradead.org/mailman/listinfo/linux-riscv
> > I admit that I found this all quite difficult to read; IIUC, this is > > missing an IS_ENABLED(CONFIG_RISCV_ISA_ZACAS) check. > > I'm not sure we need the zacas check here, since we could use a toolchain > that supports zabha but not zacas, run this on a zabha/zacas platform and it > would work. One specific set-up I was concerned about is as follows: 1) hardware implements both zabha and zacas 2) toolchain supports both zabha and zacas 3) CONFIG_RISCV_ISA_ZABHA=y and CONFIG_RISCV_ISA_ZACAS=n Since CONFIG_RISCV_ISA_ZABHA=y, the first asm goto will get executed and, since the hardware implements zacas, that will result in a nop. Then the second asm goto will get executed and, since the hardware implements zabha, it will result in the j zabha. In conclusion, the amocas instruction following the zabha: label will get executed, thus violating (the semantics of) CONFIG_RISCV_ISA_ZACAS=n. IIUC, the diff I've posted previously in this thread shared a similar limitation/bug. Andrea
Hi Andrea, On Wed, Jul 10, 2024 at 1:51 AM Andrea Parri <parri.andrea@gmail.com> wrote: > > > > I admit that I found this all quite difficult to read; IIUC, this is > > > missing an IS_ENABLED(CONFIG_RISCV_ISA_ZACAS) check. > > > > I'm not sure we need the zacas check here, since we could use a toolchain > > that supports zabha but not zacas, run this on a zabha/zacas platform and it > > would work. > > One specific set-up I was concerned about is as follows: > > 1) hardware implements both zabha and zacas > 2) toolchain supports both zabha and zacas > 3) CONFIG_RISCV_ISA_ZABHA=y and CONFIG_RISCV_ISA_ZACAS=n > > Since CONFIG_RISCV_ISA_ZABHA=y, the first asm goto will get executed > and, since the hardware implements zacas, that will result in a nop. > Then the second asm goto will get executed and, since the hardware > implements zabha, it will result in the j zabha. In conclusion, the > amocas instruction following the zabha: label will get executed, thus > violating (the semantics of) CONFIG_RISCV_ISA_ZACAS=n. IIUC, the diff > I've posted previously in this thread shared a similar limitation/bug. So you mean that when disabling Zacas, we should actually disable *all* the CAS instructions, even the Zabha ones. It makes sense and allows for a single way to disable the CAS instructions but keeping the other atomic operations. I'll fix that and add a comment. Thanks, Alex > > Andrea
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 1caaedec88c7..d3b0f92f92da 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -596,6 +596,23 @@ config RISCV_ISA_V_PREEMPTIVE preemption. Enabling this config will result in higher memory consumption due to the allocation of per-task's kernel Vector context. +config TOOLCHAIN_HAS_ZABHA + bool + default y + depends on !64BIT || $(cc-option,-mabi=lp64 -march=rv64ima_zabha) + depends on !32BIT || $(cc-option,-mabi=ilp32 -march=rv32ima_zabha) + depends on AS_HAS_OPTION_ARCH + +config RISCV_ISA_ZABHA + bool "Zabha extension support for atomic byte/halfword operations" + depends on TOOLCHAIN_HAS_ZABHA + default y + help + Enable the use of the Zabha ISA-extension to implement kernel + byte/halfword atomic memory operations when it is detected at boot. + + If you don't know what to do here, say Y. + config TOOLCHAIN_HAS_ZACAS bool default y diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index 9fd13d7a9cc6..78dcaaeebf4e 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -88,6 +88,9 @@ riscv-march-$(CONFIG_TOOLCHAIN_HAS_ZIHINTPAUSE) := $(riscv-march-y)_zihintpause # Check if the toolchain supports Zacas riscv-march-$(CONFIG_TOOLCHAIN_HAS_ZACAS) := $(riscv-march-y)_zacas +# Check if the toolchain supports Zabha +riscv-march-$(CONFIG_TOOLCHAIN_HAS_ZABHA) := $(riscv-march-y)_zabha + # Remove F,D,V from isa string for all. Keep extensions between "fd" and "v" by # matching non-v and non-multi-letter extensions out with the filter ([^v_]*) KBUILD_CFLAGS += -march=$(shell echo $(riscv-march-y) | sed -E 's/(rv32ima|rv64ima)fd([^v_]*)v?/\1\2/') diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h index a58a2141c6d3..b9a3fdcec919 100644 --- a/arch/riscv/include/asm/cmpxchg.h +++ b/arch/riscv/include/asm/cmpxchg.h @@ -105,8 +105,20 @@ * indicated by comparing RETURN with OLD. */ -#define __arch_cmpxchg_masked(sc_sfx, prepend, append, r, p, o, n) \ +#define __arch_cmpxchg_masked(sc_sfx, cas_sfx, prepend, append, r, p, o, n) \ ({ \ + __label__ no_zacas, zabha, end; \ + \ + if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA)) { \ + asm goto(ALTERNATIVE("j %[no_zacas]", "nop", 0, \ + RISCV_ISA_EXT_ZACAS, 1) \ + : : : : no_zacas); \ + asm goto(ALTERNATIVE("nop", "j %[zabha]", 0, \ + RISCV_ISA_EXT_ZABHA, 1) \ + : : : : zabha); \ + } \ + \ +no_zacas:; \ u32 *__ptr32b = (u32 *)((ulong)(p) & ~0x3); \ ulong __s = ((ulong)(p) & (0x4 - sizeof(*p))) * BITS_PER_BYTE; \ ulong __mask = GENMASK(((sizeof(*p)) * BITS_PER_BYTE) - 1, 0) \ @@ -133,6 +145,19 @@ : "memory"); \ \ r = (__typeof__(*(p)))((__retx & __mask) >> __s); \ + goto end; \ + \ +zabha: \ + if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA)) { \ + __asm__ __volatile__ ( \ + prepend \ + " amocas" cas_sfx " %0, %z2, %1\n" \ + append \ + : "+&r" (r), "+A" (*(p)) \ + : "rJ" (n) \ + : "memory"); \ + } \ +end:; \ }) #define __arch_cmpxchg(lr_sfx, sc_cas_sfx, prepend, append, r, p, co, o, n) \ @@ -181,8 +206,13 @@ end:; \ \ switch (sizeof(*__ptr)) { \ case 1: \ + __arch_cmpxchg_masked(sc_sfx, ".b" sc_sfx, \ + prepend, append, \ + __ret, __ptr, __old, __new); \ + break; \ case 2: \ - __arch_cmpxchg_masked(sc_sfx, prepend, append, \ + __arch_cmpxchg_masked(sc_sfx, ".h" sc_sfx, \ + prepend, append, \ __ret, __ptr, __old, __new); \ break; \ case 4: \
This adds runtime support for Zabha in cmpxchg8/16() operations. Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com> --- arch/riscv/Kconfig | 17 ++++++++++++++++ arch/riscv/Makefile | 3 +++ arch/riscv/include/asm/cmpxchg.h | 34 ++++++++++++++++++++++++++++++-- 3 files changed, 52 insertions(+), 2 deletions(-)