Message ID | 20241216032253.685728-1-guoren@kernel.org (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | riscv: Implement smp_cond_load8/16() with Zawrs | expand |
On Sun, Dec 15, 2024 at 10:22:53PM -0500, guoren@kernel.org wrote: > From: Guo Ren <guoren@linux.alibaba.com> > > RISC-V code uses the queued spinlock implementation, which calls > the macros smp_cond_load_acquire for one byte. So, complement the > implementation of byte and halfword versions. > > Signed-off-by: Guo Ren <guoren@linux.alibaba.com> > Signed-off-by: Guo Ren <guoren@kernel.org> > --- > arch/riscv/include/asm/cmpxchg.h | 38 +++++++++++++++++++++++++++++--- > 1 file changed, 35 insertions(+), 3 deletions(-) > > diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h > index 4cadc56220fe..2bd42a11ff8f 100644 > --- a/arch/riscv/include/asm/cmpxchg.h > +++ b/arch/riscv/include/asm/cmpxchg.h > @@ -365,16 +365,48 @@ static __always_inline void __cmpwait(volatile void *ptr, > { > unsigned long tmp; > > + u32 *__ptr32b; > + ulong __s, __val, __mask; > + > asm goto(ALTERNATIVE("j %l[no_zawrs]", "nop", > 0, RISCV_ISA_EXT_ZAWRS, 1) > : : : : no_zawrs); > > switch (size) { > case 1: > - fallthrough; > + __ptr32b = (u32 *)((ulong)(ptr) & ~0x3); > + __s = ((ulong)(ptr) & 0x3) * BITS_PER_BYTE; > + __val = val << __s; > + __mask = 0xf << __s; This mask should be 0xff and the mask below should be 0xffff. > + > + asm volatile( > + " lr.w %0, %1\n" > + " and %0, %0, %3\n" > + " xor %0, %0, %2\n" > + " bnez %0, 1f\n" > + ZAWRS_WRS_NTO "\n" > + "1:" > + : "=&r" (tmp), "+A" (*(__ptr32b)) > + : "r" (__val), "r" (__mask) > + : "memory"); > + break; > case 2: > - /* RISC-V doesn't have lr instructions on byte and half-word. */ > - goto no_zawrs; > + __ptr32b = (u32 *)((ulong)(ptr) & ~0x3); > + __s = ((ulong)(ptr) & 0x2) * BITS_PER_BYTE; > + __val = val << __s; > + __mask = 0xff << __s; > + > + asm volatile( > + " lr.w %0, %1\n" > + " and %0, %0, %3\n" > + " xor %0, %0, %2\n" > + " bnez %0, 1f\n" > + ZAWRS_WRS_NTO "\n" > + "1:" > + : "=&r" (tmp), "+A" (*(__ptr32b)) > + : "r" (__val), "r" (__mask) > + : "memory"); > + break; > case 4: > asm volatile( > " lr.w %0, %1\n" > -- > 2.40.1 > Thanks, drew
On Mon, Dec 16, 2024 at 11:42 PM Andrew Jones <ajones@ventanamicro.com> wrote: > > On Sun, Dec 15, 2024 at 10:22:53PM -0500, guoren@kernel.org wrote: > > From: Guo Ren <guoren@linux.alibaba.com> > > > > RISC-V code uses the queued spinlock implementation, which calls > > the macros smp_cond_load_acquire for one byte. So, complement the > > implementation of byte and halfword versions. > > > > Signed-off-by: Guo Ren <guoren@linux.alibaba.com> > > Signed-off-by: Guo Ren <guoren@kernel.org> > > --- > > arch/riscv/include/asm/cmpxchg.h | 38 +++++++++++++++++++++++++++++--- > > 1 file changed, 35 insertions(+), 3 deletions(-) > > > > diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h > > index 4cadc56220fe..2bd42a11ff8f 100644 > > --- a/arch/riscv/include/asm/cmpxchg.h > > +++ b/arch/riscv/include/asm/cmpxchg.h > > @@ -365,16 +365,48 @@ static __always_inline void __cmpwait(volatile void *ptr, > > { > > unsigned long tmp; > > > > + u32 *__ptr32b; > > + ulong __s, __val, __mask; > > + > > asm goto(ALTERNATIVE("j %l[no_zawrs]", "nop", > > 0, RISCV_ISA_EXT_ZAWRS, 1) > > : : : : no_zawrs); > > > > switch (size) { > > case 1: > > - fallthrough; > > + __ptr32b = (u32 *)((ulong)(ptr) & ~0x3); > > + __s = ((ulong)(ptr) & 0x3) * BITS_PER_BYTE; > > + __val = val << __s; > > + __mask = 0xf << __s; > > This mask should be 0xff and the mask below should be 0xffff. Thx for catching it; it's hard to test it out. I will correct it in the next version. > > > + > > + asm volatile( > > + " lr.w %0, %1\n" > > + " and %0, %0, %3\n" > > + " xor %0, %0, %2\n" > > + " bnez %0, 1f\n" > > + ZAWRS_WRS_NTO "\n" > > + "1:" > > + : "=&r" (tmp), "+A" (*(__ptr32b)) > > + : "r" (__val), "r" (__mask) > > + : "memory"); > > + break; > > case 2: > > - /* RISC-V doesn't have lr instructions on byte and half-word. */ > > - goto no_zawrs; > > + __ptr32b = (u32 *)((ulong)(ptr) & ~0x3); > > + __s = ((ulong)(ptr) & 0x2) * BITS_PER_BYTE; > > + __val = val << __s; > > + __mask = 0xff << __s; > > + > > + asm volatile( > > + " lr.w %0, %1\n" > > + " and %0, %0, %3\n" > > + " xor %0, %0, %2\n" > > + " bnez %0, 1f\n" > > + ZAWRS_WRS_NTO "\n" > > + "1:" > > + : "=&r" (tmp), "+A" (*(__ptr32b)) > > + : "r" (__val), "r" (__mask) > > + : "memory"); > > + break; > > case 4: > > asm volatile( > > " lr.w %0, %1\n" > > -- > > 2.40.1 > > > > Thanks, > drew
diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h index 4cadc56220fe..2bd42a11ff8f 100644 --- a/arch/riscv/include/asm/cmpxchg.h +++ b/arch/riscv/include/asm/cmpxchg.h @@ -365,16 +365,48 @@ static __always_inline void __cmpwait(volatile void *ptr, { unsigned long tmp; + u32 *__ptr32b; + ulong __s, __val, __mask; + asm goto(ALTERNATIVE("j %l[no_zawrs]", "nop", 0, RISCV_ISA_EXT_ZAWRS, 1) : : : : no_zawrs); switch (size) { case 1: - fallthrough; + __ptr32b = (u32 *)((ulong)(ptr) & ~0x3); + __s = ((ulong)(ptr) & 0x3) * BITS_PER_BYTE; + __val = val << __s; + __mask = 0xf << __s; + + asm volatile( + " lr.w %0, %1\n" + " and %0, %0, %3\n" + " xor %0, %0, %2\n" + " bnez %0, 1f\n" + ZAWRS_WRS_NTO "\n" + "1:" + : "=&r" (tmp), "+A" (*(__ptr32b)) + : "r" (__val), "r" (__mask) + : "memory"); + break; case 2: - /* RISC-V doesn't have lr instructions on byte and half-word. */ - goto no_zawrs; + __ptr32b = (u32 *)((ulong)(ptr) & ~0x3); + __s = ((ulong)(ptr) & 0x2) * BITS_PER_BYTE; + __val = val << __s; + __mask = 0xff << __s; + + asm volatile( + " lr.w %0, %1\n" + " and %0, %0, %3\n" + " xor %0, %0, %2\n" + " bnez %0, 1f\n" + ZAWRS_WRS_NTO "\n" + "1:" + : "=&r" (tmp), "+A" (*(__ptr32b)) + : "r" (__val), "r" (__mask) + : "memory"); + break; case 4: asm volatile( " lr.w %0, %1\n"