@@ -11,8 +11,27 @@
#include <asm/fence.h>
#include <asm/alternative.h>
-#define __arch_xchg_masked(sc_sfx, prepend, append, r, p, n) \
+#define __arch_xchg_masked(sc_sfx, swap_sfx, prepend, sc_append, \
+ swap_append, r, p, n) \
({ \
+ __label__ no_zabha, end; \
+ \
+ if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA)) { \
+ asm goto(ALTERNATIVE("j %[no_zabha]", "nop", 0, \
+ RISCV_ISA_EXT_ZABHA, 1) \
+ : : : : no_zabha); \
+ \
+ __asm__ __volatile__ ( \
+ prepend \
+ " amoswap" swap_sfx " %0, %z2, %1\n" \
+ swap_append \
+ : "=&r" (r), "+A" (*(p)) \
+ : "rJ" (n) \
+ : "memory"); \
+ goto end; \
+ } \
+ \
+no_zabha:; \
u32 *__ptr32b = (u32 *)((ulong)(p) & ~0x3); \
ulong __s = ((ulong)(p) & (0x4 - sizeof(*p))) * BITS_PER_BYTE; \
ulong __mask = GENMASK(((sizeof(*p)) * BITS_PER_BYTE) - 1, 0) \
@@ -28,12 +47,14 @@
" or %1, %1, %z3\n" \
" sc.w" sc_sfx " %1, %1, %2\n" \
" bnez %1, 0b\n" \
- append \
+ sc_append \
: "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b)) \
: "rJ" (__newx), "rJ" (~__mask) \
: "memory"); \
\
r = (__typeof__(*(p)))((__retx & __mask) >> __s); \
+ \
+end:; \
})
#define __arch_xchg(sfx, prepend, append, r, p, n) \
@@ -56,8 +77,13 @@
\
switch (sizeof(*__ptr)) { \
case 1: \
+ __arch_xchg_masked(sc_sfx, ".b" swap_sfx, \
+ prepend, sc_append, swap_append, \
+ __ret, __ptr, __new); \
+ break; \
case 2: \
- __arch_xchg_masked(sc_sfx, prepend, sc_append, \
+ __arch_xchg_masked(sc_sfx, ".h" swap_sfx, \
+ prepend, sc_append, swap_append, \
__ret, __ptr, __new); \
break; \
case 4: \
This adds runtime support for Zabha in xchg8/16() operations. Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com> --- arch/riscv/include/asm/cmpxchg.h | 32 +++++++++++++++++++++++++++++--- 1 file changed, 29 insertions(+), 3 deletions(-)