Message ID | 20240717061957.140712-6-alexghiti@rivosinc.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | Zacas/Zabha support and qspinlocks | expand |
On Wed, Jul 17, 2024 at 08:19:51AM GMT, Alexandre Ghiti wrote: > Now that Zacas is supported in the kernel, let's use the double word > atomic version of amocas to improve the SLUB allocator. > > Note that we have to select fixed registers, otherwise gcc fails to pick > even registers and then produces a reserved encoding which fails to > assemble. Oh, that's quite unfortunate... I guess we should try to get some new RISC-V inline assembly register constraints added to support register pairs. > > Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com> > --- > arch/riscv/Kconfig | 1 + > arch/riscv/include/asm/cmpxchg.h | 39 ++++++++++++++++++++++++++++++++ > 2 files changed, 40 insertions(+) > > diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig > index d3b0f92f92da..0bbaec0444d0 100644 > --- a/arch/riscv/Kconfig > +++ b/arch/riscv/Kconfig > @@ -104,6 +104,7 @@ config RISCV > select GENERIC_VDSO_TIME_NS if HAVE_GENERIC_VDSO > select HARDIRQS_SW_RESEND > select HAS_IOPORT if MMU > + select HAVE_ALIGNED_STRUCT_PAGE > select HAVE_ARCH_AUDITSYSCALL > select HAVE_ARCH_HUGE_VMALLOC if HAVE_ARCH_HUGE_VMAP > select HAVE_ARCH_HUGE_VMAP if MMU && 64BIT > diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h > index 97b24da38897..608d98522557 100644 > --- a/arch/riscv/include/asm/cmpxchg.h > +++ b/arch/riscv/include/asm/cmpxchg.h > @@ -289,4 +289,43 @@ end:; \ > arch_cmpxchg_release((ptr), (o), (n)); \ > }) > > +#ifdef CONFIG_RISCV_ISA_ZACAS This is also 64-bit only, so needs a CONFIG_64BIT check too. > + > +#define system_has_cmpxchg128() \ > + riscv_has_extension_unlikely(RISCV_ISA_EXT_ZACAS) nit: let's let this stick out since we have 100 chars > + > +union __u128_halves { > + u128 full; > + struct { > + u64 low, high; Should we consider big endian too? > + }; > +}; > + > +#define __arch_cmpxchg128(p, o, n, cas_sfx) \ > +({ \ > + __typeof__(*(p)) __o = (o); \ > + union __u128_halves __hn = { .full = (n) }; \ > + union __u128_halves __ho = { .full = (__o) }; \ > + register unsigned long x6 asm ("x6") = __hn.low; \ > + register unsigned long x7 asm ("x7") = __hn.high; \ > + register unsigned long x28 asm ("x28") = __ho.low; \ > + register unsigned long x29 asm ("x29") = __ho.high; \ Can we use t1,t2,t3,t4 rather than the x names? > + \ > + __asm__ __volatile__ ( \ > + " amocas.q" cas_sfx " %0, %z3, %2" \ > + : "+&r" (x28), "+&r" (x29), "+A" (*(p)) \ > + : "rJ" (x6), "rJ" (x7) \ > + : "memory"); \ > + \ > + ((u128)x29 << 64) | x28; \ > +}) > + > +#define arch_cmpxchg128(ptr, o, n) \ > + __arch_cmpxchg128((ptr), (o), (n), ".aqrl") > + > +#define arch_cmpxchg128_local(ptr, o, n) \ > + __arch_cmpxchg128((ptr), (o), (n), "") > + > +#endif /* CONFIG_RISCV_ISA_ZACAS */ > + > #endif /* _ASM_RISCV_CMPXCHG_H */ > -- > 2.39.2 Thanks, drew
Hi Drew, On Wed, Jul 17, 2024 at 10:34 PM Andrew Jones <ajones@ventanamicro.com> wrote: > > On Wed, Jul 17, 2024 at 08:19:51AM GMT, Alexandre Ghiti wrote: > > Now that Zacas is supported in the kernel, let's use the double word > > atomic version of amocas to improve the SLUB allocator. > > > > Note that we have to select fixed registers, otherwise gcc fails to pick > > even registers and then produces a reserved encoding which fails to > > assemble. > > Oh, that's quite unfortunate... I guess we should try to get some new > RISC-V inline assembly register constraints added to support register > pairs. I internally informed the compilers people, I'll check their progress. > > > > > Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com> > > --- > > arch/riscv/Kconfig | 1 + > > arch/riscv/include/asm/cmpxchg.h | 39 ++++++++++++++++++++++++++++++++ > > 2 files changed, 40 insertions(+) > > > > diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig > > index d3b0f92f92da..0bbaec0444d0 100644 > > --- a/arch/riscv/Kconfig > > +++ b/arch/riscv/Kconfig > > @@ -104,6 +104,7 @@ config RISCV > > select GENERIC_VDSO_TIME_NS if HAVE_GENERIC_VDSO > > select HARDIRQS_SW_RESEND > > select HAS_IOPORT if MMU > > + select HAVE_ALIGNED_STRUCT_PAGE > > select HAVE_ARCH_AUDITSYSCALL > > select HAVE_ARCH_HUGE_VMALLOC if HAVE_ARCH_HUGE_VMAP > > select HAVE_ARCH_HUGE_VMAP if MMU && 64BIT > > diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h > > index 97b24da38897..608d98522557 100644 > > --- a/arch/riscv/include/asm/cmpxchg.h > > +++ b/arch/riscv/include/asm/cmpxchg.h > > @@ -289,4 +289,43 @@ end:; \ > > arch_cmpxchg_release((ptr), (o), (n)); \ > > }) > > > > +#ifdef CONFIG_RISCV_ISA_ZACAS > > This is also 64-bit only, so needs a CONFIG_64BIT check too. Yep, thanks > > > + > > +#define system_has_cmpxchg128() \ > > + riscv_has_extension_unlikely(RISCV_ISA_EXT_ZACAS) > > nit: let's let this stick out since we have 100 chars Ok > > > + > > +union __u128_halves { > > + u128 full; > > + struct { > > + u64 low, high; > > Should we consider big endian too? Should we care about big endian? We don't deal with big endian anywhere in our kernel right now. > > > + }; > > +}; > > + > > +#define __arch_cmpxchg128(p, o, n, cas_sfx) \ > > +({ \ > > + __typeof__(*(p)) __o = (o); \ > > + union __u128_halves __hn = { .full = (n) }; \ > > + union __u128_halves __ho = { .full = (__o) }; \ > > + register unsigned long x6 asm ("x6") = __hn.low; \ > > + register unsigned long x7 asm ("x7") = __hn.high; \ > > + register unsigned long x28 asm ("x28") = __ho.low; \ > > + register unsigned long x29 asm ("x29") = __ho.high; \ > > Can we use t1,t2,t3,t4 rather than the x names? We can, I did not because it was a bit misleading in the sense that amocas expects an *even* register and using the tX registers, we'll pass an *odd* register (which actually is even but still). Anyway, I'll change that, I don't like the xX notation. Thanks for the review, Alex > > > + \ > > + __asm__ __volatile__ ( \ > > + " amocas.q" cas_sfx " %0, %z3, %2" \ > > + : "+&r" (x28), "+&r" (x29), "+A" (*(p)) \ > > + : "rJ" (x6), "rJ" (x7) \ > > + : "memory"); \ > > + \ > > + ((u128)x29 << 64) | x28; \ > > +}) > > + > > +#define arch_cmpxchg128(ptr, o, n) \ > > + __arch_cmpxchg128((ptr), (o), (n), ".aqrl") > > + > > +#define arch_cmpxchg128_local(ptr, o, n) \ > > + __arch_cmpxchg128((ptr), (o), (n), "") > > + > > +#endif /* CONFIG_RISCV_ISA_ZACAS */ > > + > > #endif /* _ASM_RISCV_CMPXCHG_H */ > > -- > > 2.39.2 > > Thanks, > drew
On Thu, Jul 18, 2024 at 09:48:42AM +0200, Alexandre Ghiti wrote: > On Wed, Jul 17, 2024 at 10:34 PM Andrew Jones <ajones@ventanamicro.com> wrote: > > On Wed, Jul 17, 2024 at 08:19:51AM GMT, Alexandre Ghiti wrote: > > > + > > > +union __u128_halves { > > > + u128 full; > > > + struct { > > > + u64 low, high; > > > > Should we consider big endian too? > > Should we care about big endian? We don't deal with big endian > anywhere in our kernel right now. There's one or two places I think that we do actually have some conditional stuff for BE. The Zbb string routines I believe is one such place, and maybe there are one or two others. In general I'm not of the opinion that it is worth adding complexity for BE until there's linux-capable hardware that supports it (so not QEMU or people's toy implementations), unless it's something that userspace is able to see.
On Thu, Jul 18, 2024, at 10:33, Conor Dooley wrote: > On Thu, Jul 18, 2024 at 09:48:42AM +0200, Alexandre Ghiti wrote: >> On Wed, Jul 17, 2024 at 10:34 PM Andrew Jones <ajones@ventanamicro.com> wrote: >> > On Wed, Jul 17, 2024 at 08:19:51AM GMT, Alexandre Ghiti wrote: >> > > + >> > > +union __u128_halves { >> > > + u128 full; >> > > + struct { >> > > + u64 low, high; >> > >> > Should we consider big endian too? >> >> Should we care about big endian? We don't deal with big endian >> anywhere in our kernel right now. > > There's one or two places I think that we do actually have some > conditional stuff for BE. The Zbb string routines I believe is one such > place, and maybe there are one or two others. In general I'm not of the > opinion that it is worth adding complexity for BE until there's > linux-capable hardware that supports it (so not QEMU or people's toy > implementations), unless it's something that userspace is able to see. I don't think you want to go there at all: maintaining an extra user space ABI (or two if you add 32-bit BE as well) has a huge long-term cost, and there is pretty much zero benefit for a BE ABI these days. Adding it to arm64 turned out to be a mistake. We did have a handful of users in the first year, and it technically still works, but I don't think there are any users left after they managed to fix their nonportable legacy userspace from that was ported from big-endian mips or powerpc. Arnd
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index d3b0f92f92da..0bbaec0444d0 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -104,6 +104,7 @@ config RISCV select GENERIC_VDSO_TIME_NS if HAVE_GENERIC_VDSO select HARDIRQS_SW_RESEND select HAS_IOPORT if MMU + select HAVE_ALIGNED_STRUCT_PAGE select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_HUGE_VMALLOC if HAVE_ARCH_HUGE_VMAP select HAVE_ARCH_HUGE_VMAP if MMU && 64BIT diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h index 97b24da38897..608d98522557 100644 --- a/arch/riscv/include/asm/cmpxchg.h +++ b/arch/riscv/include/asm/cmpxchg.h @@ -289,4 +289,43 @@ end:; \ arch_cmpxchg_release((ptr), (o), (n)); \ }) +#ifdef CONFIG_RISCV_ISA_ZACAS + +#define system_has_cmpxchg128() \ + riscv_has_extension_unlikely(RISCV_ISA_EXT_ZACAS) + +union __u128_halves { + u128 full; + struct { + u64 low, high; + }; +}; + +#define __arch_cmpxchg128(p, o, n, cas_sfx) \ +({ \ + __typeof__(*(p)) __o = (o); \ + union __u128_halves __hn = { .full = (n) }; \ + union __u128_halves __ho = { .full = (__o) }; \ + register unsigned long x6 asm ("x6") = __hn.low; \ + register unsigned long x7 asm ("x7") = __hn.high; \ + register unsigned long x28 asm ("x28") = __ho.low; \ + register unsigned long x29 asm ("x29") = __ho.high; \ + \ + __asm__ __volatile__ ( \ + " amocas.q" cas_sfx " %0, %z3, %2" \ + : "+&r" (x28), "+&r" (x29), "+A" (*(p)) \ + : "rJ" (x6), "rJ" (x7) \ + : "memory"); \ + \ + ((u128)x29 << 64) | x28; \ +}) + +#define arch_cmpxchg128(ptr, o, n) \ + __arch_cmpxchg128((ptr), (o), (n), ".aqrl") + +#define arch_cmpxchg128_local(ptr, o, n) \ + __arch_cmpxchg128((ptr), (o), (n), "") + +#endif /* CONFIG_RISCV_ISA_ZACAS */ + #endif /* _ASM_RISCV_CMPXCHG_H */
Now that Zacas is supported in the kernel, let's use the double word atomic version of amocas to improve the SLUB allocator. Note that we have to select fixed registers, otherwise gcc fails to pick even registers and then produces a reserved encoding which fails to assemble. Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com> --- arch/riscv/Kconfig | 1 + arch/riscv/include/asm/cmpxchg.h | 39 ++++++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+)