Message ID | 20230910082911.3378782-4-guoren@kernel.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | riscv: Add Native/Paravirt qspinlock support | expand |
On Sun, Sep 10, 2023 at 04:28:57AM -0400, guoren@kernel.org wrote: > From: Guo Ren <guoren@linux.alibaba.com> > > Cache-block prefetch instructions are HINTs to the hardware to > indicate that software intends to perform a particular type of > memory access in the near future. Enable ARCH_HAS_PREFETCHW and > improve the arch_xchg for qspinlock xchg_tail. > > Signed-off-by: Guo Ren <guoren@linux.alibaba.com> > Signed-off-by: Guo Ren <guoren@kernel.org> > --- > arch/riscv/Kconfig | 15 +++++++++++++++ > arch/riscv/include/asm/cmpxchg.h | 4 +++- > arch/riscv/include/asm/hwcap.h | 1 + > arch/riscv/include/asm/insn-def.h | 5 +++++ > arch/riscv/include/asm/processor.h | 13 +++++++++++++ > arch/riscv/kernel/cpufeature.c | 1 + > 6 files changed, 38 insertions(+), 1 deletion(-) > > diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig > index e9ae6fa232c3..2c346fe169c1 100644 > --- a/arch/riscv/Kconfig > +++ b/arch/riscv/Kconfig > @@ -617,6 +617,21 @@ config RISCV_ISA_ZICBOZ > > If you don't know what to do here, say Y. > > +config RISCV_ISA_ZICBOP > + bool "Zicbop extension support for cache block prefetch" > + depends on MMU > + depends on RISCV_ALTERNATIVE > + default y > + help > + Adds support to dynamically detect the presence of the ZICBOP > + extension (Cache Block Prefetch Operations) and enable its > + usage. > + > + The Zicbop extension can be used to prefetch cache block for > + read/write/instruction fetch. > + > + If you don't know what to do here, say Y. > + > config TOOLCHAIN_HAS_ZIHINTPAUSE > bool > default y > diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h > index 702725727671..56eff7a9d2d2 100644 > --- a/arch/riscv/include/asm/cmpxchg.h > +++ b/arch/riscv/include/asm/cmpxchg.h > @@ -11,6 +11,7 @@ > > #include <asm/barrier.h> > #include <asm/fence.h> > +#include <asm/processor.h> > > #define __arch_xchg_masked(prepend, append, r, p, n) \ > ({ \ > @@ -25,6 +26,7 @@ > \ > __asm__ __volatile__ ( \ > prepend \ > + PREFETCHW_ASM(%5) \ > "0: lr.w %0, %2\n" \ > " and %1, %0, %z4\n" \ > " or %1, %1, %z3\n" \ > @@ -32,7 +34,7 @@ > " bnez %1, 0b\n" \ > append \ > : "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b)) \ > - : "rJ" (__newx), "rJ" (~__mask) \ > + : "rJ" (__newx), "rJ" (~__mask), "rJ" (__ptr32b) \ > : "memory"); \ > \ > r = (__typeof__(*(p)))((__retx & __mask) >> __s); \ > diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h > index b7b58258f6c7..78b7b8b53778 100644 > --- a/arch/riscv/include/asm/hwcap.h > +++ b/arch/riscv/include/asm/hwcap.h > @@ -58,6 +58,7 @@ > #define RISCV_ISA_EXT_ZICSR 40 > #define RISCV_ISA_EXT_ZIFENCEI 41 > #define RISCV_ISA_EXT_ZIHPM 42 > +#define RISCV_ISA_EXT_ZICBOP 43 > > #define RISCV_ISA_EXT_MAX 64 > > diff --git a/arch/riscv/include/asm/insn-def.h b/arch/riscv/include/asm/insn-def.h > index 6960beb75f32..dc590d331894 100644 > --- a/arch/riscv/include/asm/insn-def.h > +++ b/arch/riscv/include/asm/insn-def.h > @@ -134,6 +134,7 @@ > > #define RV_OPCODE_MISC_MEM RV_OPCODE(15) > #define RV_OPCODE_SYSTEM RV_OPCODE(115) > +#define RV_OPCODE_PREFETCH RV_OPCODE(19) > > #define HFENCE_VVMA(vaddr, asid) \ > INSN_R(OPCODE_SYSTEM, FUNC3(0), FUNC7(17), \ > @@ -196,4 +197,8 @@ > INSN_I(OPCODE_MISC_MEM, FUNC3(2), __RD(0), \ > RS1(base), SIMM12(4)) > > +#define CBO_prefetchw(base) \ > + INSN_R(OPCODE_PREFETCH, FUNC3(6), FUNC7(0), \ > + RD(x0), RS1(base), RS2(x0)) > + I understand that here you create the instruction via bitfield, following the ISA, and this enables using instructions not available on the toolchain. It took me some time to find the document with this instruction, so please add this to the commit msg: https://github.com/riscv/riscv-CMOs/blob/master/specifications/cmobase-v1.0.pdf Page 23. IIUC, the instruction is "prefetch.w". Maybe I am missing something, but in the document the rs2 field (PREFETCH.W) contains a 0x3, while the above looks to have a 0 instead. rs2 field = 0x0 would be a prefetch.i (instruction prefetch) instead. Is the above correct, or am I missing something? Thanks! Leo > #endif /* __ASM_INSN_DEF_H */ > diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h > index de9da852f78d..7ad3a24212e8 100644 > --- a/arch/riscv/include/asm/processor.h > +++ b/arch/riscv/include/asm/processor.h > @@ -12,6 +12,8 @@ > #include <vdso/processor.h> > > #include <asm/ptrace.h> > +#include <asm/insn-def.h> > +#include <asm/hwcap.h> > > #ifdef CONFIG_64BIT > #define DEFAULT_MAP_WINDOW (UL(1) << (MMAP_VA_BITS - 1)) > @@ -103,6 +105,17 @@ static inline void arch_thread_struct_whitelist(unsigned long *offset, > #define KSTK_EIP(tsk) (ulong)(task_pt_regs(tsk)->epc) > #define KSTK_ESP(tsk) (ulong)(task_pt_regs(tsk)->sp) > > +#define ARCH_HAS_PREFETCHW > +#define PREFETCHW_ASM(base) ALTERNATIVE(__nops(1), \ > + CBO_prefetchw(base), \ > + 0, \ > + RISCV_ISA_EXT_ZICBOP, \ > + CONFIG_RISCV_ISA_ZICBOP) > +static inline void prefetchw(const void *ptr) > +{ > + asm volatile(PREFETCHW_ASM(%0) > + : : "r" (ptr) : "memory"); > +} > > /* Do necessary setup to start up a newly executed thread. */ > extern void start_thread(struct pt_regs *regs, > diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c > index ef7b4fd9e876..e0b897db0b97 100644 > --- a/arch/riscv/kernel/cpufeature.c > +++ b/arch/riscv/kernel/cpufeature.c > @@ -159,6 +159,7 @@ const struct riscv_isa_ext_data riscv_isa_ext[] = { > __RISCV_ISA_EXT_DATA(h, RISCV_ISA_EXT_h), > __RISCV_ISA_EXT_DATA(zicbom, RISCV_ISA_EXT_ZICBOM), > __RISCV_ISA_EXT_DATA(zicboz, RISCV_ISA_EXT_ZICBOZ), > + __RISCV_ISA_EXT_DATA(zicbop, RISCV_ISA_EXT_ZICBOP), > __RISCV_ISA_EXT_DATA(zicntr, RISCV_ISA_EXT_ZICNTR), > __RISCV_ISA_EXT_DATA(zicsr, RISCV_ISA_EXT_ZICSR), > __RISCV_ISA_EXT_DATA(zifencei, RISCV_ISA_EXT_ZIFENCEI), > -- > 2.36.1 >
On Sun, Sep 10, 2023 at 04:28:57AM -0400, guoren@kernel.org wrote: > From: Guo Ren <guoren@linux.alibaba.com> > > Cache-block prefetch instructions are HINTs to the hardware to > indicate that software intends to perform a particular type of > memory access in the near future. Enable ARCH_HAS_PREFETCHW and > improve the arch_xchg for qspinlock xchg_tail. > > Signed-off-by: Guo Ren <guoren@linux.alibaba.com> > Signed-off-by: Guo Ren <guoren@kernel.org> > --- > arch/riscv/Kconfig | 15 +++++++++++++++ > arch/riscv/include/asm/cmpxchg.h | 4 +++- > arch/riscv/include/asm/hwcap.h | 1 + > arch/riscv/include/asm/insn-def.h | 5 +++++ > arch/riscv/include/asm/processor.h | 13 +++++++++++++ > arch/riscv/kernel/cpufeature.c | 1 + > 6 files changed, 38 insertions(+), 1 deletion(-) > > diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig > index e9ae6fa232c3..2c346fe169c1 100644 > --- a/arch/riscv/Kconfig > +++ b/arch/riscv/Kconfig > @@ -617,6 +617,21 @@ config RISCV_ISA_ZICBOZ > > If you don't know what to do here, say Y. > > +config RISCV_ISA_ZICBOP > + bool "Zicbop extension support for cache block prefetch" > + depends on MMU > + depends on RISCV_ALTERNATIVE > + default y > + help > + Adds support to dynamically detect the presence of the ZICBOP > + extension (Cache Block Prefetch Operations) and enable its > + usage. > + > + The Zicbop extension can be used to prefetch cache block for > + read/write/instruction fetch. > + > + If you don't know what to do here, say Y. > + > config TOOLCHAIN_HAS_ZIHINTPAUSE > bool > default y > diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h > index 702725727671..56eff7a9d2d2 100644 > --- a/arch/riscv/include/asm/cmpxchg.h > +++ b/arch/riscv/include/asm/cmpxchg.h > @@ -11,6 +11,7 @@ > > #include <asm/barrier.h> > #include <asm/fence.h> > +#include <asm/processor.h> > > #define __arch_xchg_masked(prepend, append, r, p, n) \ > ({ \ > @@ -25,6 +26,7 @@ > \ > __asm__ __volatile__ ( \ > prepend \ > + PREFETCHW_ASM(%5) \ > "0: lr.w %0, %2\n" \ > " and %1, %0, %z4\n" \ > " or %1, %1, %z3\n" \ > @@ -32,7 +34,7 @@ > " bnez %1, 0b\n" \ > append \ > : "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b)) \ > - : "rJ" (__newx), "rJ" (~__mask) \ > + : "rJ" (__newx), "rJ" (~__mask), "rJ" (__ptr32b) \ > : "memory"); \ > \ > r = (__typeof__(*(p)))((__retx & __mask) >> __s); \ > diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h > index b7b58258f6c7..78b7b8b53778 100644 > --- a/arch/riscv/include/asm/hwcap.h > +++ b/arch/riscv/include/asm/hwcap.h > @@ -58,6 +58,7 @@ > #define RISCV_ISA_EXT_ZICSR 40 > #define RISCV_ISA_EXT_ZIFENCEI 41 > #define RISCV_ISA_EXT_ZIHPM 42 > +#define RISCV_ISA_EXT_ZICBOP 43 > > #define RISCV_ISA_EXT_MAX 64 > > diff --git a/arch/riscv/include/asm/insn-def.h b/arch/riscv/include/asm/insn-def.h > index 6960beb75f32..dc590d331894 100644 > --- a/arch/riscv/include/asm/insn-def.h > +++ b/arch/riscv/include/asm/insn-def.h > @@ -134,6 +134,7 @@ > > #define RV_OPCODE_MISC_MEM RV_OPCODE(15) > #define RV_OPCODE_SYSTEM RV_OPCODE(115) > +#define RV_OPCODE_PREFETCH RV_OPCODE(19) This should be named RV_OPCODE_OP_IMM and be placed in numerical order with the others, i.e. above SYSTEM. > > #define HFENCE_VVMA(vaddr, asid) \ > INSN_R(OPCODE_SYSTEM, FUNC3(0), FUNC7(17), \ > @@ -196,4 +197,8 @@ > INSN_I(OPCODE_MISC_MEM, FUNC3(2), __RD(0), \ > RS1(base), SIMM12(4)) > > +#define CBO_prefetchw(base) \ Please name this 'PREFETCH_w' and it should take an immediate parameter, even if we intend to pass 0 for it. > + INSN_R(OPCODE_PREFETCH, FUNC3(6), FUNC7(0), \ > + RD(x0), RS1(base), RS2(x0)) prefetch.w is not an R-type instruction, it's an S-type. While the bit shifts are the same, the names are different. We need to add S-type names while defining this instruction. Then, this define would be #define PREFETCH_w(base, imm) \ INSN_S(OPCODE_OP_IMM, FUNC3(6), IMM_11_5(imm), __IMM_4_0(0), \ RS1(base), __RS2(3)) When the assembler as insn_r I hope it will validate that (imm & 0xfe0) == imm > + > #endif /* __ASM_INSN_DEF_H */ > diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h > index de9da852f78d..7ad3a24212e8 100644 > --- a/arch/riscv/include/asm/processor.h > +++ b/arch/riscv/include/asm/processor.h > @@ -12,6 +12,8 @@ > #include <vdso/processor.h> > > #include <asm/ptrace.h> > +#include <asm/insn-def.h> > +#include <asm/hwcap.h> > > #ifdef CONFIG_64BIT > #define DEFAULT_MAP_WINDOW (UL(1) << (MMAP_VA_BITS - 1)) > @@ -103,6 +105,17 @@ static inline void arch_thread_struct_whitelist(unsigned long *offset, > #define KSTK_EIP(tsk) (ulong)(task_pt_regs(tsk)->epc) > #define KSTK_ESP(tsk) (ulong)(task_pt_regs(tsk)->sp) > > +#define ARCH_HAS_PREFETCHW > +#define PREFETCHW_ASM(base) ALTERNATIVE(__nops(1), \ > + CBO_prefetchw(base), \ > + 0, \ > + RISCV_ISA_EXT_ZICBOP, \ > + CONFIG_RISCV_ISA_ZICBOP) > +static inline void prefetchw(const void *ptr) > +{ > + asm volatile(PREFETCHW_ASM(%0) > + : : "r" (ptr) : "memory"); > +} > > /* Do necessary setup to start up a newly executed thread. */ > extern void start_thread(struct pt_regs *regs, > diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c > index ef7b4fd9e876..e0b897db0b97 100644 > --- a/arch/riscv/kernel/cpufeature.c > +++ b/arch/riscv/kernel/cpufeature.c > @@ -159,6 +159,7 @@ const struct riscv_isa_ext_data riscv_isa_ext[] = { > __RISCV_ISA_EXT_DATA(h, RISCV_ISA_EXT_h), > __RISCV_ISA_EXT_DATA(zicbom, RISCV_ISA_EXT_ZICBOM), > __RISCV_ISA_EXT_DATA(zicboz, RISCV_ISA_EXT_ZICBOZ), > + __RISCV_ISA_EXT_DATA(zicbop, RISCV_ISA_EXT_ZICBOP), zicbop should be above zicboz (extensions alphabetical within their category). > __RISCV_ISA_EXT_DATA(zicntr, RISCV_ISA_EXT_ZICNTR), > __RISCV_ISA_EXT_DATA(zicsr, RISCV_ISA_EXT_ZICSR), > __RISCV_ISA_EXT_DATA(zifencei, RISCV_ISA_EXT_ZIFENCEI), > -- > 2.36.1 > Thanks, drew
On Sun, Sep 10, 2023 at 04:28:57AM -0400, guoren@kernel.org wrote: > From: Guo Ren <guoren@linux.alibaba.com> > > Cache-block prefetch instructions are HINTs to the hardware to > indicate that software intends to perform a particular type of > memory access in the near future. Enable ARCH_HAS_PREFETCHW and > improve the arch_xchg for qspinlock xchg_tail. > > Signed-off-by: Guo Ren <guoren@linux.alibaba.com> > Signed-off-by: Guo Ren <guoren@kernel.org> > --- > arch/riscv/Kconfig | 15 +++++++++++++++ > arch/riscv/include/asm/cmpxchg.h | 4 +++- > arch/riscv/include/asm/hwcap.h | 1 + > arch/riscv/include/asm/insn-def.h | 5 +++++ > arch/riscv/include/asm/processor.h | 13 +++++++++++++ > arch/riscv/kernel/cpufeature.c | 1 + > 6 files changed, 38 insertions(+), 1 deletion(-) > > diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig > index e9ae6fa232c3..2c346fe169c1 100644 > --- a/arch/riscv/Kconfig > +++ b/arch/riscv/Kconfig > @@ -617,6 +617,21 @@ config RISCV_ISA_ZICBOZ > > If you don't know what to do here, say Y. > > +config RISCV_ISA_ZICBOP Even if we're not concerned with looping over blocks yet, I think we should introduce zicbop block size DT parsing at the same time we bring zicbop support to the kernel (it's just more copy+paste from zicbom and zicboz). It's a bit annoying that the CMO spec doesn't state that block sizes should be the same for m/z/p. And, the fact that m/z/p are all separate extensions leads us to needing to parse block sizes for all three, despite the fact that in practice they'll probably be the same. Thanks, drew
On Thu, Sep 14, 2023 at 04:25:53PM +0200, Andrew Jones wrote: > On Sun, Sep 10, 2023 at 04:28:57AM -0400, guoren@kernel.org wrote: > > From: Guo Ren <guoren@linux.alibaba.com> > > > > Cache-block prefetch instructions are HINTs to the hardware to > > indicate that software intends to perform a particular type of > > memory access in the near future. Enable ARCH_HAS_PREFETCHW and > > improve the arch_xchg for qspinlock xchg_tail. > > > > Signed-off-by: Guo Ren <guoren@linux.alibaba.com> > > Signed-off-by: Guo Ren <guoren@kernel.org> > > --- > > arch/riscv/Kconfig | 15 +++++++++++++++ > > arch/riscv/include/asm/cmpxchg.h | 4 +++- > > arch/riscv/include/asm/hwcap.h | 1 + > > arch/riscv/include/asm/insn-def.h | 5 +++++ > > arch/riscv/include/asm/processor.h | 13 +++++++++++++ > > arch/riscv/kernel/cpufeature.c | 1 + > > 6 files changed, 38 insertions(+), 1 deletion(-) > > > > diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig > > index e9ae6fa232c3..2c346fe169c1 100644 > > --- a/arch/riscv/Kconfig > > +++ b/arch/riscv/Kconfig > > @@ -617,6 +617,21 @@ config RISCV_ISA_ZICBOZ > > > > If you don't know what to do here, say Y. > > > > +config RISCV_ISA_ZICBOP > > Even if we're not concerned with looping over blocks yet, I think we > should introduce zicbop block size DT parsing at the same time we bring > zicbop support to the kernel (it's just more copy+paste from zicbom and > zicboz). It's a bit annoying that the CMO spec doesn't state that block > sizes should be the same for m/z/p. And, the fact that m/z/p are all > separate extensions leads us to needing to parse block sizes for all > three, despite the fact that in practice they'll probably be the same. Although, I saw on a different mailing list that Andrei Warkentin interpreted section 2.7 "Software Discovery" of the spec, which states """ The initial set of CMO extensions requires the following information to be discovered by software: * The size of the cache block for management and prefetch instructions * The size of the cache block for zero instructions * CBIE support at each privilege level Other general cache characteristics may also be specified in the discovery mechanism. """ as management and prefetch having the same block size and only zero potentially having a different size. That looks like a reasonable interpretation to me, too. So, we could maybe proceed with assuming we can use zicbom_block_size for prefetch, for now. If a platform comes along that interpreted the spec differently, requiring prefetch block size to be specified separately, then we'll cross that bridge when we get there. Thanks, drew
On Thu, Sep 14, 2023 at 03:47:59PM +0200, Andrew Jones wrote: > On Sun, Sep 10, 2023 at 04:28:57AM -0400, guoren@kernel.org wrote: > > From: Guo Ren <guoren@linux.alibaba.com> > > > > Cache-block prefetch instructions are HINTs to the hardware to > > indicate that software intends to perform a particular type of > > memory access in the near future. Enable ARCH_HAS_PREFETCHW and > > improve the arch_xchg for qspinlock xchg_tail. > > > > Signed-off-by: Guo Ren <guoren@linux.alibaba.com> > > Signed-off-by: Guo Ren <guoren@kernel.org> > > --- > > arch/riscv/Kconfig | 15 +++++++++++++++ > > arch/riscv/include/asm/cmpxchg.h | 4 +++- > > arch/riscv/include/asm/hwcap.h | 1 + > > arch/riscv/include/asm/insn-def.h | 5 +++++ > > arch/riscv/include/asm/processor.h | 13 +++++++++++++ > > arch/riscv/kernel/cpufeature.c | 1 + > > 6 files changed, 38 insertions(+), 1 deletion(-) > > > > diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig > > index e9ae6fa232c3..2c346fe169c1 100644 > > --- a/arch/riscv/Kconfig > > +++ b/arch/riscv/Kconfig > > @@ -617,6 +617,21 @@ config RISCV_ISA_ZICBOZ > > > > If you don't know what to do here, say Y. > > > > +config RISCV_ISA_ZICBOP > > + bool "Zicbop extension support for cache block prefetch" > > + depends on MMU > > + depends on RISCV_ALTERNATIVE > > + default y > > + help > > + Adds support to dynamically detect the presence of the ZICBOP > > + extension (Cache Block Prefetch Operations) and enable its > > + usage. > > + > > + The Zicbop extension can be used to prefetch cache block for > > + read/write/instruction fetch. > > + > > + If you don't know what to do here, say Y. > > + > > config TOOLCHAIN_HAS_ZIHINTPAUSE > > bool > > default y > > diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h > > index 702725727671..56eff7a9d2d2 100644 > > --- a/arch/riscv/include/asm/cmpxchg.h > > +++ b/arch/riscv/include/asm/cmpxchg.h > > @@ -11,6 +11,7 @@ > > > > #include <asm/barrier.h> > > #include <asm/fence.h> > > +#include <asm/processor.h> > > > > #define __arch_xchg_masked(prepend, append, r, p, n) \ > > ({ \ > > @@ -25,6 +26,7 @@ > > \ > > __asm__ __volatile__ ( \ > > prepend \ > > + PREFETCHW_ASM(%5) \ > > "0: lr.w %0, %2\n" \ > > " and %1, %0, %z4\n" \ > > " or %1, %1, %z3\n" \ > > @@ -32,7 +34,7 @@ > > " bnez %1, 0b\n" \ > > append \ > > : "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b)) \ > > - : "rJ" (__newx), "rJ" (~__mask) \ > > + : "rJ" (__newx), "rJ" (~__mask), "rJ" (__ptr32b) \ > > : "memory"); \ > > \ > > r = (__typeof__(*(p)))((__retx & __mask) >> __s); \ > > diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h > > index b7b58258f6c7..78b7b8b53778 100644 > > --- a/arch/riscv/include/asm/hwcap.h > > +++ b/arch/riscv/include/asm/hwcap.h > > @@ -58,6 +58,7 @@ > > #define RISCV_ISA_EXT_ZICSR 40 > > #define RISCV_ISA_EXT_ZIFENCEI 41 > > #define RISCV_ISA_EXT_ZIHPM 42 > > +#define RISCV_ISA_EXT_ZICBOP 43 > > > > #define RISCV_ISA_EXT_MAX 64 > > > > diff --git a/arch/riscv/include/asm/insn-def.h b/arch/riscv/include/asm/insn-def.h > > index 6960beb75f32..dc590d331894 100644 > > --- a/arch/riscv/include/asm/insn-def.h > > +++ b/arch/riscv/include/asm/insn-def.h > > @@ -134,6 +134,7 @@ > > > > #define RV_OPCODE_MISC_MEM RV_OPCODE(15) > > #define RV_OPCODE_SYSTEM RV_OPCODE(115) > > +#define RV_OPCODE_PREFETCH RV_OPCODE(19) > > This should be named RV_OPCODE_OP_IMM and be placed in > numerical order with the others, i.e. above SYSTEM. > > > > > #define HFENCE_VVMA(vaddr, asid) \ > > INSN_R(OPCODE_SYSTEM, FUNC3(0), FUNC7(17), \ > > @@ -196,4 +197,8 @@ > > INSN_I(OPCODE_MISC_MEM, FUNC3(2), __RD(0), \ > > RS1(base), SIMM12(4)) > > > > +#define CBO_prefetchw(base) \ > > Please name this 'PREFETCH_w' and it should take an immediate parameter, > even if we intend to pass 0 for it. It makes sense. The mnemonic in the previously mentioned documentation is: prefetch.w offset(base) So yeah, makes sense to have both offset and base as parameters for CBO_prefetchw (or PREFETCH_w, I have no strong preference). > > > + INSN_R(OPCODE_PREFETCH, FUNC3(6), FUNC7(0), \ > > + RD(x0), RS1(base), RS2(x0)) > > prefetch.w is not an R-type instruction, it's an S-type. While the bit > shifts are the same, the names are different. We need to add S-type > names while defining this instruction. That is correct, it is supposed to look like a store instruction (S-type), even though documentation don't explicitly state that. Even though it works fine with the R-type definition, code documentation would be wrong, and future changes could break it. > Then, this define would be > > #define PREFETCH_w(base, imm) \ > INSN_S(OPCODE_OP_IMM, FUNC3(6), IMM_11_5(imm), __IMM_4_0(0), \ > RS1(base), __RS2(3)) s/OPCODE_OP_IMM/OPCODE_PREFETCH 0x4 vs 0x13 RS2 == 0x3 is correct (PREFETCH.W instead of PREFETCH.I) So IIUC, it should be: INSN_S(OPCODE_PREFETCH, FUNC3(6), IMM_11_5(imm), __IMM_4_0(0), \ RS1(base), __RS2(3) Thanks, Leo > > When the assembler as insn_r I hope it will validate that > (imm & 0xfe0) == imm > > > + > > #endif /* __ASM_INSN_DEF_H */ > > diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h > > index de9da852f78d..7ad3a24212e8 100644 > > --- a/arch/riscv/include/asm/processor.h > > +++ b/arch/riscv/include/asm/processor.h > > @@ -12,6 +12,8 @@ > > #include <vdso/processor.h> > > > > #include <asm/ptrace.h> > > +#include <asm/insn-def.h> > > +#include <asm/hwcap.h> > > > > #ifdef CONFIG_64BIT > > #define DEFAULT_MAP_WINDOW (UL(1) << (MMAP_VA_BITS - 1)) > > @@ -103,6 +105,17 @@ static inline void arch_thread_struct_whitelist(unsigned long *offset, > > #define KSTK_EIP(tsk) (ulong)(task_pt_regs(tsk)->epc) > > #define KSTK_ESP(tsk) (ulong)(task_pt_regs(tsk)->sp) > > > > +#define ARCH_HAS_PREFETCHW > > +#define PREFETCHW_ASM(base) ALTERNATIVE(__nops(1), \ > > + CBO_prefetchw(base), \ > > + 0, \ > > + RISCV_ISA_EXT_ZICBOP, \ > > + CONFIG_RISCV_ISA_ZICBOP) > > +static inline void prefetchw(const void *ptr) > > +{ > > + asm volatile(PREFETCHW_ASM(%0) > > + : : "r" (ptr) : "memory"); > > +} > > > > /* Do necessary setup to start up a newly executed thread. */ > > extern void start_thread(struct pt_regs *regs, > > diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c > > index ef7b4fd9e876..e0b897db0b97 100644 > > --- a/arch/riscv/kernel/cpufeature.c > > +++ b/arch/riscv/kernel/cpufeature.c > > @@ -159,6 +159,7 @@ const struct riscv_isa_ext_data riscv_isa_ext[] = { > > __RISCV_ISA_EXT_DATA(h, RISCV_ISA_EXT_h), > > __RISCV_ISA_EXT_DATA(zicbom, RISCV_ISA_EXT_ZICBOM), > > __RISCV_ISA_EXT_DATA(zicboz, RISCV_ISA_EXT_ZICBOZ), > > + __RISCV_ISA_EXT_DATA(zicbop, RISCV_ISA_EXT_ZICBOP), > > zicbop should be above zicboz (extensions alphabetical within their > category). > > > __RISCV_ISA_EXT_DATA(zicntr, RISCV_ISA_EXT_ZICNTR), > > __RISCV_ISA_EXT_DATA(zicsr, RISCV_ISA_EXT_ZICSR), > > __RISCV_ISA_EXT_DATA(zifencei, RISCV_ISA_EXT_ZIFENCEI), > > -- > > 2.36.1 > > > > Thanks, > drew >
On Fri, Sep 15, 2023 at 05:22:26AM -0300, Leonardo Bras wrote: > On Thu, Sep 14, 2023 at 03:47:59PM +0200, Andrew Jones wrote: > > On Sun, Sep 10, 2023 at 04:28:57AM -0400, guoren@kernel.org wrote: > > > From: Guo Ren <guoren@linux.alibaba.com> ... > > > diff --git a/arch/riscv/include/asm/insn-def.h b/arch/riscv/include/asm/insn-def.h > > > index 6960beb75f32..dc590d331894 100644 > > > --- a/arch/riscv/include/asm/insn-def.h > > > +++ b/arch/riscv/include/asm/insn-def.h > > > @@ -134,6 +134,7 @@ > > > > > > #define RV_OPCODE_MISC_MEM RV_OPCODE(15) > > > #define RV_OPCODE_SYSTEM RV_OPCODE(115) > > > +#define RV_OPCODE_PREFETCH RV_OPCODE(19) > > > > This should be named RV_OPCODE_OP_IMM and be placed in > > numerical order with the others, i.e. above SYSTEM. > > > > > > > > #define HFENCE_VVMA(vaddr, asid) \ > > > INSN_R(OPCODE_SYSTEM, FUNC3(0), FUNC7(17), \ > > > @@ -196,4 +197,8 @@ > > > INSN_I(OPCODE_MISC_MEM, FUNC3(2), __RD(0), \ > > > RS1(base), SIMM12(4)) > > > > > > +#define CBO_prefetchw(base) \ > > > > Please name this 'PREFETCH_w' and it should take an immediate parameter, > > even if we intend to pass 0 for it. > > It makes sense. > > The mnemonic in the previously mentioned documentation is: > > prefetch.w offset(base) > > So yeah, makes sense to have both offset and base as parameters for > CBO_prefetchw (or PREFETCH_w, I have no strong preference). I have a strong preference :-) PREFETCH_w is consistent with the naming we already have for e.g. cbo.clean, which is CBO_clean. The instruction we're picking a name for now is prefetch.w, not cbo.prefetchw. > > > > > > + INSN_R(OPCODE_PREFETCH, FUNC3(6), FUNC7(0), \ > > > + RD(x0), RS1(base), RS2(x0)) > > > > prefetch.w is not an R-type instruction, it's an S-type. While the bit > > shifts are the same, the names are different. We need to add S-type > > names while defining this instruction. > > That is correct, it is supposed to look like a store instruction (S-type), > even though documentation don't explicitly state that. > > Even though it works fine with the R-type definition, code documentation > would be wrong, and future changes could break it. > > > Then, this define would be > > > > #define PREFETCH_w(base, imm) \ I should have suggested 'offset' instead of 'imm' for the second parameter name. > > INSN_S(OPCODE_OP_IMM, FUNC3(6), IMM_11_5(imm), __IMM_4_0(0), \ > > RS1(base), __RS2(3)) > > s/OPCODE_OP_IMM/OPCODE_PREFETCH > 0x4 vs 0x13 There's no major opcode named "PREFETCH" and the spec says that the major opcode used for prefetch instructions is OP-IMM. That's why we want to name this OPCODE_OP_IMM. I'm not sure where the 0x4 you're referring to comes from. A 32-bit instruction has the lowest two bits set (figure 1.1 of the unpriv spec) and table 27.1 of the unpriv spec shows OP-IMM is 0b00100xx, so we have 0b0010011. Keeping the naming of the opcode macros consistent with the spec also keeps them consistent with the .insn directive where we could even use the names directly, i.e. .insn s OP_IMM, 6, x3, 0(a0) > > > > When the assembler as insn_r I hope it will validate that I meant insn_s here, which would be the macro for '.insn s' > > (imm & 0xfe0) == imm I played with it. It won't do what we want for prefetch, only what works for s-type instructions in general, i.e. it allows +/-2047 offsets and fails for everything else. That's good enough. We can just mask off the low 5 bits here in our macro #define PREFETCH_w(base, offset) \ INSN_S(OPCODE_OP_IMM, FUNC3(6), IMM_11_5((offset) & ~0x1f), \ __IMM_4_0(0), RS1(base), __RS2(3)) Thanks, drew
On Fri, Sep 15, 2023 at 01:07:40PM +0200, Andrew Jones wrote: > On Fri, Sep 15, 2023 at 05:22:26AM -0300, Leonardo Bras wrote: > > On Thu, Sep 14, 2023 at 03:47:59PM +0200, Andrew Jones wrote: > > > On Sun, Sep 10, 2023 at 04:28:57AM -0400, guoren@kernel.org wrote: > > > > From: Guo Ren <guoren@linux.alibaba.com> > ... > > > > diff --git a/arch/riscv/include/asm/insn-def.h b/arch/riscv/include/asm/insn-def.h > > > > index 6960beb75f32..dc590d331894 100644 > > > > --- a/arch/riscv/include/asm/insn-def.h > > > > +++ b/arch/riscv/include/asm/insn-def.h > > > > @@ -134,6 +134,7 @@ > > > > > > > > #define RV_OPCODE_MISC_MEM RV_OPCODE(15) > > > > #define RV_OPCODE_SYSTEM RV_OPCODE(115) > > > > +#define RV_OPCODE_PREFETCH RV_OPCODE(19) > > > > > > This should be named RV_OPCODE_OP_IMM and be placed in > > > numerical order with the others, i.e. above SYSTEM. > > > > > > > > > > > #define HFENCE_VVMA(vaddr, asid) \ > > > > INSN_R(OPCODE_SYSTEM, FUNC3(0), FUNC7(17), \ > > > > @@ -196,4 +197,8 @@ > > > > INSN_I(OPCODE_MISC_MEM, FUNC3(2), __RD(0), \ > > > > RS1(base), SIMM12(4)) > > > > > > > > +#define CBO_prefetchw(base) \ > > > > > > Please name this 'PREFETCH_w' and it should take an immediate parameter, > > > even if we intend to pass 0 for it. > > > > It makes sense. > > > > The mnemonic in the previously mentioned documentation is: > > > > prefetch.w offset(base) > > > > So yeah, makes sense to have both offset and base as parameters for > > CBO_prefetchw (or PREFETCH_w, I have no strong preference). > > I have a strong preference :-) > > PREFETCH_w is consistent with the naming we already have for e.g. > cbo.clean, which is CBO_clean. The instruction we're picking a name > for now is prefetch.w, not cbo.prefetchw. btw, the CBO_foo stuff was named that way as we were using them in alternatives originally as an argument, that manifested as: "cbo." __stringify(_op) " (a0)\n\t" That was later changed to CBO_##_op(a0) but the then un-needed (AFAICT) capitalisation was kept to avoid touching the callsites of the alternative. Maybe you remember better than I do drew, since the idea was yours & I forgot I even wrote that pattch. If this isn't being used in a similar manner, then the w has no reason to be in the odd lowercase form. Cheers, Conor. > > > > > > > > > > + INSN_R(OPCODE_PREFETCH, FUNC3(6), FUNC7(0), \ > > > > + RD(x0), RS1(base), RS2(x0)) > > > > > > prefetch.w is not an R-type instruction, it's an S-type. While the bit > > > shifts are the same, the names are different. We need to add S-type > > > names while defining this instruction. > > > > That is correct, it is supposed to look like a store instruction (S-type), > > even though documentation don't explicitly state that. > > > > Even though it works fine with the R-type definition, code documentation > > would be wrong, and future changes could break it. > > > > > Then, this define would be > > > > > > #define PREFETCH_w(base, imm) \ > > I should have suggested 'offset' instead of 'imm' for the second parameter > name. > > > > INSN_S(OPCODE_OP_IMM, FUNC3(6), IMM_11_5(imm), __IMM_4_0(0), \ > > > RS1(base), __RS2(3)) > > > > s/OPCODE_OP_IMM/OPCODE_PREFETCH > > 0x4 vs 0x13 > > There's no major opcode named "PREFETCH" and the spec says that the major > opcode used for prefetch instructions is OP-IMM. That's why we want to > name this OPCODE_OP_IMM. I'm not sure where the 0x4 you're referring to > comes from. A 32-bit instruction has the lowest two bits set (figure 1.1 > of the unpriv spec) and table 27.1 of the unpriv spec shows OP-IMM is > 0b00100xx, so we have 0b0010011. Keeping the naming of the opcode macros > consistent with the spec also keeps them consistent with the .insn > directive where we could even use the names directly, i.e. > > .insn s OP_IMM, 6, x3, 0(a0) > > > > > > > When the assembler as insn_r I hope it will validate that > > I meant insn_s here, which would be the macro for '.insn s' > > > > (imm & 0xfe0) == imm > > I played with it. It won't do what we want for prefetch, only > what works for s-type instructions in general, i.e. it allows > +/-2047 offsets and fails for everything else. That's good enough. > We can just mask off the low 5 bits here in our macro > > #define PREFETCH_w(base, offset) \ > INSN_S(OPCODE_OP_IMM, FUNC3(6), IMM_11_5((offset) & ~0x1f), \ > __IMM_4_0(0), RS1(base), __RS2(3)) > > Thanks, > drew
Yo, On Thu, Sep 14, 2023 at 04:47:18PM +0200, Andrew Jones wrote: > On Thu, Sep 14, 2023 at 04:25:53PM +0200, Andrew Jones wrote: > > On Sun, Sep 10, 2023 at 04:28:57AM -0400, guoren@kernel.org wrote: > > > From: Guo Ren <guoren@linux.alibaba.com> > > > > > > Cache-block prefetch instructions are HINTs to the hardware to > > > indicate that software intends to perform a particular type of > > > memory access in the near future. Enable ARCH_HAS_PREFETCHW and > > > improve the arch_xchg for qspinlock xchg_tail. > > > > > > Signed-off-by: Guo Ren <guoren@linux.alibaba.com> > > > Signed-off-by: Guo Ren <guoren@kernel.org> > > > --- > > > arch/riscv/Kconfig | 15 +++++++++++++++ > > > arch/riscv/include/asm/cmpxchg.h | 4 +++- > > > arch/riscv/include/asm/hwcap.h | 1 + > > > arch/riscv/include/asm/insn-def.h | 5 +++++ > > > arch/riscv/include/asm/processor.h | 13 +++++++++++++ > > > arch/riscv/kernel/cpufeature.c | 1 + > > > 6 files changed, 38 insertions(+), 1 deletion(-) > > > > > > diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig > > > index e9ae6fa232c3..2c346fe169c1 100644 > > > --- a/arch/riscv/Kconfig > > > +++ b/arch/riscv/Kconfig > > > @@ -617,6 +617,21 @@ config RISCV_ISA_ZICBOZ > > > > > > If you don't know what to do here, say Y. > > > > > > +config RISCV_ISA_ZICBOP > > > > Even if we're not concerned with looping over blocks yet, I think we > > should introduce zicbop block size DT parsing at the same time we bring > > zicbop support to the kernel (it's just more copy+paste from zicbom and > > zicboz). It's a bit annoying that the CMO spec doesn't state that block > > sizes should be the same for m/z/p. And, the fact that m/z/p are all > > separate extensions leads us to needing to parse block sizes for all > > three, despite the fact that in practice they'll probably be the same. > > Although, I saw on a different mailing list that Andrei Warkentin > interpreted section 2.7 "Software Discovery" of the spec, which states > > """ > The initial set of CMO extensions requires the following information to be > discovered by software: > > * The size of the cache block for management and prefetch instructions > * The size of the cache block for zero instructions > * CBIE support at each privilege level > > Other general cache characteristics may also be specified in the discovery > mechanism. > """ > > as management and prefetch having the same block size and only zero > potentially having a different size. That looks like a reasonable > interpretation to me, too. TBH, I don't really care what ambiguous wording the spec has used, we have the opportunity to make better decisions if we please. I hate the fact that the specs are often not abundantly clear about things like this. > So, we could maybe proceed with assuming we > can use zicbom_block_size for prefetch, for now. If a platform comes along > that interpreted the spec differently, requiring prefetch block size to > be specified separately, then we'll cross that bridge when we get there. That said, I think I suggested originally having the zicboz stuff default to the zicbom size too, so I'd be happy with prefetch stuff working exclusively that way until someone comes along looking for different sizes. The binding should be updated though since riscv,cbom-block-size: $ref: /schemas/types.yaml#/definitions/uint32 description: The blocksize in bytes for the Zicbom cache operations. would no longer be a complete description. While thinking about new wording though, it feels really clunky to describe it like: The block size in bytes for the Zicbom cache operations, Zicbop cache operations will default to this block size where not explicitly defined. since there's then no way to actually define the block size if it is different. Unless you've got some magic wording, I'd rather document riscv,cbop-block-size, even if we are going to use riscv,cbom-block-size as the default. Cheers, Conor.
On Fri, Sep 15, 2023 at 12:37:50PM +0100, Conor Dooley wrote: > Yo, > > On Thu, Sep 14, 2023 at 04:47:18PM +0200, Andrew Jones wrote: > > On Thu, Sep 14, 2023 at 04:25:53PM +0200, Andrew Jones wrote: > > > On Sun, Sep 10, 2023 at 04:28:57AM -0400, guoren@kernel.org wrote: > > > > From: Guo Ren <guoren@linux.alibaba.com> > > > > > > > > Cache-block prefetch instructions are HINTs to the hardware to > > > > indicate that software intends to perform a particular type of > > > > memory access in the near future. Enable ARCH_HAS_PREFETCHW and > > > > improve the arch_xchg for qspinlock xchg_tail. > > > > > > > > Signed-off-by: Guo Ren <guoren@linux.alibaba.com> > > > > Signed-off-by: Guo Ren <guoren@kernel.org> > > > > --- > > > > arch/riscv/Kconfig | 15 +++++++++++++++ > > > > arch/riscv/include/asm/cmpxchg.h | 4 +++- > > > > arch/riscv/include/asm/hwcap.h | 1 + > > > > arch/riscv/include/asm/insn-def.h | 5 +++++ > > > > arch/riscv/include/asm/processor.h | 13 +++++++++++++ > > > > arch/riscv/kernel/cpufeature.c | 1 + > > > > 6 files changed, 38 insertions(+), 1 deletion(-) > > > > > > > > diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig > > > > index e9ae6fa232c3..2c346fe169c1 100644 > > > > --- a/arch/riscv/Kconfig > > > > +++ b/arch/riscv/Kconfig > > > > @@ -617,6 +617,21 @@ config RISCV_ISA_ZICBOZ > > > > > > > > If you don't know what to do here, say Y. > > > > > > > > +config RISCV_ISA_ZICBOP > > > > > > Even if we're not concerned with looping over blocks yet, I think we > > > should introduce zicbop block size DT parsing at the same time we bring > > > zicbop support to the kernel (it's just more copy+paste from zicbom and > > > zicboz). It's a bit annoying that the CMO spec doesn't state that block > > > sizes should be the same for m/z/p. And, the fact that m/z/p are all > > > separate extensions leads us to needing to parse block sizes for all > > > three, despite the fact that in practice they'll probably be the same. > > > > Although, I saw on a different mailing list that Andrei Warkentin > > interpreted section 2.7 "Software Discovery" of the spec, which states > > > > """ > > The initial set of CMO extensions requires the following information to be > > discovered by software: > > > > * The size of the cache block for management and prefetch instructions > > * The size of the cache block for zero instructions > > * CBIE support at each privilege level > > > > Other general cache characteristics may also be specified in the discovery > > mechanism. > > """ > > > > as management and prefetch having the same block size and only zero > > potentially having a different size. That looks like a reasonable > > interpretation to me, too. > > TBH, I don't really care what ambiguous wording the spec has used, we > have the opportunity to make better decisions if we please. I hate the > fact that the specs are often not abundantly clear about things like this. > > > So, we could maybe proceed with assuming we > > can use zicbom_block_size for prefetch, for now. If a platform comes along > > that interpreted the spec differently, requiring prefetch block size to > > be specified separately, then we'll cross that bridge when we get there. > > That said, I think I suggested originally having the zicboz stuff default > to the zicbom size too, so I'd be happy with prefetch stuff working > exclusively that way until someone comes along looking for different sizes. > The binding should be updated though since > > riscv,cbom-block-size: > $ref: /schemas/types.yaml#/definitions/uint32 > description: > The blocksize in bytes for the Zicbom cache operations. > > would no longer be a complete description. > > While thinking about new wording though, it feels really clunky to describe > it like: > The block size in bytes for the Zicbom cache operations, Zicbop > cache operations will default to this block size where not > explicitly defined. > > since there's then no way to actually define the block size if it is > different. Unless you've got some magic wording, I'd rather document > riscv,cbop-block-size, even if we are going to use riscv,cbom-block-size > as the default. > Sounds good to me, but if it's documented, then we should probably implement its parsing. Then, at that point, I wonder if it makes sense to have the fallback at all, or if it's not better just to require all the DTs to be explicit (even if redundant). Thanks, drew
On Fri, Sep 15, 2023 at 12:26:20PM +0100, Conor Dooley wrote: > On Fri, Sep 15, 2023 at 01:07:40PM +0200, Andrew Jones wrote: > > On Fri, Sep 15, 2023 at 05:22:26AM -0300, Leonardo Bras wrote: > > > On Thu, Sep 14, 2023 at 03:47:59PM +0200, Andrew Jones wrote: > > > > On Sun, Sep 10, 2023 at 04:28:57AM -0400, guoren@kernel.org wrote: > > > > > From: Guo Ren <guoren@linux.alibaba.com> > > ... > > > > > diff --git a/arch/riscv/include/asm/insn-def.h b/arch/riscv/include/asm/insn-def.h > > > > > index 6960beb75f32..dc590d331894 100644 > > > > > --- a/arch/riscv/include/asm/insn-def.h > > > > > +++ b/arch/riscv/include/asm/insn-def.h > > > > > @@ -134,6 +134,7 @@ > > > > > > > > > > #define RV_OPCODE_MISC_MEM RV_OPCODE(15) > > > > > #define RV_OPCODE_SYSTEM RV_OPCODE(115) > > > > > +#define RV_OPCODE_PREFETCH RV_OPCODE(19) > > > > > > > > This should be named RV_OPCODE_OP_IMM and be placed in > > > > numerical order with the others, i.e. above SYSTEM. > > > > > > > > > > > > > > #define HFENCE_VVMA(vaddr, asid) \ > > > > > INSN_R(OPCODE_SYSTEM, FUNC3(0), FUNC7(17), \ > > > > > @@ -196,4 +197,8 @@ > > > > > INSN_I(OPCODE_MISC_MEM, FUNC3(2), __RD(0), \ > > > > > RS1(base), SIMM12(4)) > > > > > > > > > > +#define CBO_prefetchw(base) \ > > > > > > > > Please name this 'PREFETCH_w' and it should take an immediate parameter, > > > > even if we intend to pass 0 for it. > > > > > > It makes sense. > > > > > > The mnemonic in the previously mentioned documentation is: > > > > > > prefetch.w offset(base) > > > > > > So yeah, makes sense to have both offset and base as parameters for > > > CBO_prefetchw (or PREFETCH_w, I have no strong preference). > > > > I have a strong preference :-) > > > > PREFETCH_w is consistent with the naming we already have for e.g. > > cbo.clean, which is CBO_clean. The instruction we're picking a name > > for now is prefetch.w, not cbo.prefetchw. > > btw, the CBO_foo stuff was named that way as we were using them in > alternatives originally as an argument, that manifested as: > "cbo." __stringify(_op) " (a0)\n\t" > That was later changed to > CBO_##_op(a0) > but the then un-needed (AFAICT) capitalisation was kept to avoid > touching the callsites of the alternative. Maybe you remember better > than I do drew, since the idea was yours & I forgot I even wrote that > pattch. And I forgot anything I may have suggested about it :-) > If this isn't being used in a similar manner, then the w has no reason > to be in the odd lowercase form. Other than to be consistent... However, the CBO_* instructions are not consistent with the rest of macros. If we don't need lowercase for any reason, then my preference would be to bite the bullet and change all the callsites of CBO_* macros and then introduce this new instruction as PREFETCH_W Thanks, drew
On Wed, Sep 13, 2023 at 4:50 PM Leonardo Bras <leobras@redhat.com> wrote: > > On Sun, Sep 10, 2023 at 04:28:57AM -0400, guoren@kernel.org wrote: > > From: Guo Ren <guoren@linux.alibaba.com> > > > > Cache-block prefetch instructions are HINTs to the hardware to > > indicate that software intends to perform a particular type of > > memory access in the near future. Enable ARCH_HAS_PREFETCHW and > > improve the arch_xchg for qspinlock xchg_tail. > > > > Signed-off-by: Guo Ren <guoren@linux.alibaba.com> > > Signed-off-by: Guo Ren <guoren@kernel.org> > > --- > > arch/riscv/Kconfig | 15 +++++++++++++++ > > arch/riscv/include/asm/cmpxchg.h | 4 +++- > > arch/riscv/include/asm/hwcap.h | 1 + > > arch/riscv/include/asm/insn-def.h | 5 +++++ > > arch/riscv/include/asm/processor.h | 13 +++++++++++++ > > arch/riscv/kernel/cpufeature.c | 1 + > > 6 files changed, 38 insertions(+), 1 deletion(-) > > > > diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig > > index e9ae6fa232c3..2c346fe169c1 100644 > > --- a/arch/riscv/Kconfig > > +++ b/arch/riscv/Kconfig > > @@ -617,6 +617,21 @@ config RISCV_ISA_ZICBOZ > > > > If you don't know what to do here, say Y. > > > > +config RISCV_ISA_ZICBOP > > + bool "Zicbop extension support for cache block prefetch" > > + depends on MMU > > + depends on RISCV_ALTERNATIVE > > + default y > > + help > > + Adds support to dynamically detect the presence of the ZICBOP > > + extension (Cache Block Prefetch Operations) and enable its > > + usage. > > + > > + The Zicbop extension can be used to prefetch cache block for > > + read/write/instruction fetch. > > + > > + If you don't know what to do here, say Y. > > + > > config TOOLCHAIN_HAS_ZIHINTPAUSE > > bool > > default y > > diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h > > index 702725727671..56eff7a9d2d2 100644 > > --- a/arch/riscv/include/asm/cmpxchg.h > > +++ b/arch/riscv/include/asm/cmpxchg.h > > @@ -11,6 +11,7 @@ > > > > #include <asm/barrier.h> > > #include <asm/fence.h> > > +#include <asm/processor.h> > > > > #define __arch_xchg_masked(prepend, append, r, p, n) \ > > ({ \ > > @@ -25,6 +26,7 @@ > > \ > > __asm__ __volatile__ ( \ > > prepend \ > > + PREFETCHW_ASM(%5) \ > > "0: lr.w %0, %2\n" \ > > " and %1, %0, %z4\n" \ > > " or %1, %1, %z3\n" \ > > @@ -32,7 +34,7 @@ > > " bnez %1, 0b\n" \ > > append \ > > : "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b)) \ > > - : "rJ" (__newx), "rJ" (~__mask) \ > > + : "rJ" (__newx), "rJ" (~__mask), "rJ" (__ptr32b) \ > > : "memory"); \ > > \ > > r = (__typeof__(*(p)))((__retx & __mask) >> __s); \ > > diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h > > index b7b58258f6c7..78b7b8b53778 100644 > > --- a/arch/riscv/include/asm/hwcap.h > > +++ b/arch/riscv/include/asm/hwcap.h > > @@ -58,6 +58,7 @@ > > #define RISCV_ISA_EXT_ZICSR 40 > > #define RISCV_ISA_EXT_ZIFENCEI 41 > > #define RISCV_ISA_EXT_ZIHPM 42 > > +#define RISCV_ISA_EXT_ZICBOP 43 > > > > #define RISCV_ISA_EXT_MAX 64 > > > > diff --git a/arch/riscv/include/asm/insn-def.h b/arch/riscv/include/asm/insn-def.h > > index 6960beb75f32..dc590d331894 100644 > > --- a/arch/riscv/include/asm/insn-def.h > > +++ b/arch/riscv/include/asm/insn-def.h > > @@ -134,6 +134,7 @@ > > > > #define RV_OPCODE_MISC_MEM RV_OPCODE(15) > > #define RV_OPCODE_SYSTEM RV_OPCODE(115) > > +#define RV_OPCODE_PREFETCH RV_OPCODE(19) > > > > #define HFENCE_VVMA(vaddr, asid) \ > > INSN_R(OPCODE_SYSTEM, FUNC3(0), FUNC7(17), \ > > @@ -196,4 +197,8 @@ > > INSN_I(OPCODE_MISC_MEM, FUNC3(2), __RD(0), \ > > RS1(base), SIMM12(4)) > > > > +#define CBO_prefetchw(base) \ > > + INSN_R(OPCODE_PREFETCH, FUNC3(6), FUNC7(0), \ > > + RD(x0), RS1(base), RS2(x0)) > > + > > I understand that here you create the instruction via bitfield, following > the ISA, and this enables using instructions not available on the > toolchain. > > It took me some time to find the document with this instruction, so please > add this to the commit msg: > > https://github.com/riscv/riscv-CMOs/blob/master/specifications/cmobase-v1.0.pdf > Page 23. > > IIUC, the instruction is "prefetch.w". > > Maybe I am missing something, but in the document the rs2 field > (PREFETCH.W) contains a 0x3, while the above looks to have a 0 instead. > > rs2 field = 0x0 would be a prefetch.i (instruction prefetch) instead. > > Is the above correct, or am I missing something? Oh, you are right. My fault, thx for pointing out. It should be: + INSN_R(OPCODE_PREFETCH, FUNC3(6), FUNC7(0), \ + RD(x0), RS1(base), RS2(x3)) > > > Thanks! > Leo > > > #endif /* __ASM_INSN_DEF_H */ > > diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h > > index de9da852f78d..7ad3a24212e8 100644 > > --- a/arch/riscv/include/asm/processor.h > > +++ b/arch/riscv/include/asm/processor.h > > @@ -12,6 +12,8 @@ > > #include <vdso/processor.h> > > > > #include <asm/ptrace.h> > > +#include <asm/insn-def.h> > > +#include <asm/hwcap.h> > > > > #ifdef CONFIG_64BIT > > #define DEFAULT_MAP_WINDOW (UL(1) << (MMAP_VA_BITS - 1)) > > @@ -103,6 +105,17 @@ static inline void arch_thread_struct_whitelist(unsigned long *offset, > > #define KSTK_EIP(tsk) (ulong)(task_pt_regs(tsk)->epc) > > #define KSTK_ESP(tsk) (ulong)(task_pt_regs(tsk)->sp) > > > > +#define ARCH_HAS_PREFETCHW > > +#define PREFETCHW_ASM(base) ALTERNATIVE(__nops(1), \ > > + CBO_prefetchw(base), \ > > + 0, \ > > + RISCV_ISA_EXT_ZICBOP, \ > > + CONFIG_RISCV_ISA_ZICBOP) > > +static inline void prefetchw(const void *ptr) > > +{ > > + asm volatile(PREFETCHW_ASM(%0) > > + : : "r" (ptr) : "memory"); > > +} > > > > /* Do necessary setup to start up a newly executed thread. */ > > extern void start_thread(struct pt_regs *regs, > > diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c > > index ef7b4fd9e876..e0b897db0b97 100644 > > --- a/arch/riscv/kernel/cpufeature.c > > +++ b/arch/riscv/kernel/cpufeature.c > > @@ -159,6 +159,7 @@ const struct riscv_isa_ext_data riscv_isa_ext[] = { > > __RISCV_ISA_EXT_DATA(h, RISCV_ISA_EXT_h), > > __RISCV_ISA_EXT_DATA(zicbom, RISCV_ISA_EXT_ZICBOM), > > __RISCV_ISA_EXT_DATA(zicboz, RISCV_ISA_EXT_ZICBOZ), > > + __RISCV_ISA_EXT_DATA(zicbop, RISCV_ISA_EXT_ZICBOP), > > __RISCV_ISA_EXT_DATA(zicntr, RISCV_ISA_EXT_ZICNTR), > > __RISCV_ISA_EXT_DATA(zicsr, RISCV_ISA_EXT_ZICSR), > > __RISCV_ISA_EXT_DATA(zifencei, RISCV_ISA_EXT_ZIFENCEI), > > -- > > 2.36.1 > > >
> > If this isn't being used in a similar manner, then the w has no reason > > to be in the odd lowercase form. > > Other than to be consistent... However, the CBO_* instructions are not > consistent with the rest of macros. If we don't need lowercase for any > reason, then my preference would be to bite the bullet and change all the > callsites of CBO_* macros and then introduce this new instruction as > PREFETCH_W Aye, I probably should've done it to begin with. Maybe there was some other consideration at the time.
On Fri, Sep 15, 2023 at 02:14:40PM +0200, Andrew Jones wrote: > On Fri, Sep 15, 2023 at 12:37:50PM +0100, Conor Dooley wrote: > > On Thu, Sep 14, 2023 at 04:47:18PM +0200, Andrew Jones wrote: > > > On Thu, Sep 14, 2023 at 04:25:53PM +0200, Andrew Jones wrote: > > > > On Sun, Sep 10, 2023 at 04:28:57AM -0400, guoren@kernel.org wrote: > > > > > From: Guo Ren <guoren@linux.alibaba.com> > > > So, we could maybe proceed with assuming we > > > can use zicbom_block_size for prefetch, for now. If a platform comes along > > > that interpreted the spec differently, requiring prefetch block size to > > > be specified separately, then we'll cross that bridge when we get there. > > > > That said, I think I suggested originally having the zicboz stuff default > > to the zicbom size too, so I'd be happy with prefetch stuff working > > exclusively that way until someone comes along looking for different sizes. > > The binding should be updated though since > > > > riscv,cbom-block-size: > > $ref: /schemas/types.yaml#/definitions/uint32 > > description: > > The blocksize in bytes for the Zicbom cache operations. > > > > would no longer be a complete description. > > > > While thinking about new wording though, it feels really clunky to describe > > it like: > > The block size in bytes for the Zicbom cache operations, Zicbop > > cache operations will default to this block size where not > > explicitly defined. > > > > since there's then no way to actually define the block size if it is > > different. Unless you've got some magic wording, I'd rather document > > riscv,cbop-block-size, even if we are going to use riscv,cbom-block-size > > as the default. > > > > Sounds good to me, but if it's documented, then we should probably > implement its parsing. Then, at that point, I wonder if it makes sense to > have the fallback at all, or if it's not better just to require all the > DTs to be explicit (even if redundant). Sure, why not I guess.
On Fri, Sep 15, 2023 at 01:07:40PM +0200, Andrew Jones wrote: > On Fri, Sep 15, 2023 at 05:22:26AM -0300, Leonardo Bras wrote: > > On Thu, Sep 14, 2023 at 03:47:59PM +0200, Andrew Jones wrote: > > > On Sun, Sep 10, 2023 at 04:28:57AM -0400, guoren@kernel.org wrote: > > > > From: Guo Ren <guoren@linux.alibaba.com> > ... > > > > diff --git a/arch/riscv/include/asm/insn-def.h b/arch/riscv/include/asm/insn-def.h > > > > index 6960beb75f32..dc590d331894 100644 > > > > --- a/arch/riscv/include/asm/insn-def.h > > > > +++ b/arch/riscv/include/asm/insn-def.h > > > > @@ -134,6 +134,7 @@ > > > > > > > > #define RV_OPCODE_MISC_MEM RV_OPCODE(15) > > > > #define RV_OPCODE_SYSTEM RV_OPCODE(115) > > > > +#define RV_OPCODE_PREFETCH RV_OPCODE(19) > > > > > > This should be named RV_OPCODE_OP_IMM and be placed in > > > numerical order with the others, i.e. above SYSTEM. > > > > > > > > > > > #define HFENCE_VVMA(vaddr, asid) \ > > > > INSN_R(OPCODE_SYSTEM, FUNC3(0), FUNC7(17), \ > > > > @@ -196,4 +197,8 @@ > > > > INSN_I(OPCODE_MISC_MEM, FUNC3(2), __RD(0), \ > > > > RS1(base), SIMM12(4)) > > > > > > > > +#define CBO_prefetchw(base) \ > > > > > > Please name this 'PREFETCH_w' and it should take an immediate parameter, > > > even if we intend to pass 0 for it. > > > > It makes sense. > > > > The mnemonic in the previously mentioned documentation is: > > > > prefetch.w offset(base) > > > > So yeah, makes sense to have both offset and base as parameters for > > CBO_prefetchw (or PREFETCH_w, I have no strong preference). > > I have a strong preference :-) > > PREFETCH_w is consistent with the naming we already have for e.g. > cbo.clean, which is CBO_clean. The instruction we're picking a name > for now is prefetch.w, not cbo.prefetchw. > > > > > > > > > > + INSN_R(OPCODE_PREFETCH, FUNC3(6), FUNC7(0), \ > > > > + RD(x0), RS1(base), RS2(x0)) > > > > > > prefetch.w is not an R-type instruction, it's an S-type. While the bit > > > shifts are the same, the names are different. We need to add S-type > > > names while defining this instruction. > > > > That is correct, it is supposed to look like a store instruction (S-type), > > even though documentation don't explicitly state that. > > > > Even though it works fine with the R-type definition, code documentation > > would be wrong, and future changes could break it. > > > > > Then, this define would be > > > > > > #define PREFETCH_w(base, imm) \ > > I should have suggested 'offset' instead of 'imm' for the second parameter > name. > > > > INSN_S(OPCODE_OP_IMM, FUNC3(6), IMM_11_5(imm), __IMM_4_0(0), \ > > > RS1(base), __RS2(3)) > > > > s/OPCODE_OP_IMM/OPCODE_PREFETCH > > 0x4 vs 0x13 > > There's no major opcode named "PREFETCH" and the spec says that the major > opcode used for prefetch instructions is OP-IMM. That's why we want to > name this OPCODE_OP_IMM. I'm not sure where the 0x4 you're referring to > comes from Oh, you are right. Sorry about this, I misinterpreted table 24.1 from the Unprivileged ISA (20191213). Yeap, everything make sense now, and the define below is not actually needed: > > > > +#define RV_OPCODE_PREFETCH RV_OPCODE(19) Thanks! Leo > . A 32-bit instruction has the lowest two bits set (figure 1.1 > of the unpriv spec) and table 27.1 of the unpriv spec shows OP-IMM is > 0b00100xx, so we have 0b0010011. Keeping the naming of the opcode macros > consistent with the spec also keeps them consistent with the .insn > directive where we could even use the names directly, i.e. > > .insn s OP_IMM, 6, x3, 0(a0) > > > > > > > When the assembler as insn_r I hope it will validate that > > I meant insn_s here, which would be the macro for '.insn s' > > > > (imm & 0xfe0) == imm > > I played with it. It won't do what we want for prefetch, only > what works for s-type instructions in general, i.e. it allows > +/-2047 offsets and fails for everything else. That's good enough. > We can just mask off the low 5 bits here in our macro > > #define PREFETCH_w(base, offset) \ > INSN_S(OPCODE_OP_IMM, FUNC3(6), IMM_11_5((offset) & ~0x1f), \ > __IMM_4_0(0), RS1(base), __RS2(3)) > > Thanks, > drew >
On Fri, Sep 15, 2023 at 01:42:56PM +0100, Conor Dooley wrote: > > > > If this isn't being used in a similar manner, then the w has no reason > > > to be in the odd lowercase form. > > > > Other than to be consistent... However, the CBO_* instructions are not > > consistent with the rest of macros. If we don't need lowercase for any > > reason, then my preference would be to bite the bullet and change all the > > callsites of CBO_* macros and then introduce this new instruction as > > PREFETCH_W > > Aye, I probably should've done it to begin with. Maybe there was some > other consideration at the time. FWIW, I sent a patch for this earlier today. I figure you saw it Drew, but nonetheless: https://lore.kernel.org/all/20230915-aloe-dollar-994937477776@spud/
On Fri, Sep 15, 2023 at 08:36:31PM +0800, Guo Ren wrote: > On Wed, Sep 13, 2023 at 4:50 PM Leonardo Bras <leobras@redhat.com> wrote: > > > > On Sun, Sep 10, 2023 at 04:28:57AM -0400, guoren@kernel.org wrote: > > > From: Guo Ren <guoren@linux.alibaba.com> > > > > > > Cache-block prefetch instructions are HINTs to the hardware to > > > indicate that software intends to perform a particular type of > > > memory access in the near future. Enable ARCH_HAS_PREFETCHW and > > > improve the arch_xchg for qspinlock xchg_tail. > > > > > > Signed-off-by: Guo Ren <guoren@linux.alibaba.com> > > > Signed-off-by: Guo Ren <guoren@kernel.org> > > > --- > > > arch/riscv/Kconfig | 15 +++++++++++++++ > > > arch/riscv/include/asm/cmpxchg.h | 4 +++- > > > arch/riscv/include/asm/hwcap.h | 1 + > > > arch/riscv/include/asm/insn-def.h | 5 +++++ > > > arch/riscv/include/asm/processor.h | 13 +++++++++++++ > > > arch/riscv/kernel/cpufeature.c | 1 + > > > 6 files changed, 38 insertions(+), 1 deletion(-) > > > > > > diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig > > > index e9ae6fa232c3..2c346fe169c1 100644 > > > --- a/arch/riscv/Kconfig > > > +++ b/arch/riscv/Kconfig > > > @@ -617,6 +617,21 @@ config RISCV_ISA_ZICBOZ > > > > > > If you don't know what to do here, say Y. > > > > > > +config RISCV_ISA_ZICBOP > > > + bool "Zicbop extension support for cache block prefetch" > > > + depends on MMU > > > + depends on RISCV_ALTERNATIVE > > > + default y > > > + help > > > + Adds support to dynamically detect the presence of the ZICBOP > > > + extension (Cache Block Prefetch Operations) and enable its > > > + usage. > > > + > > > + The Zicbop extension can be used to prefetch cache block for > > > + read/write/instruction fetch. > > > + > > > + If you don't know what to do here, say Y. > > > + > > > config TOOLCHAIN_HAS_ZIHINTPAUSE > > > bool > > > default y > > > diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h > > > index 702725727671..56eff7a9d2d2 100644 > > > --- a/arch/riscv/include/asm/cmpxchg.h > > > +++ b/arch/riscv/include/asm/cmpxchg.h > > > @@ -11,6 +11,7 @@ > > > > > > #include <asm/barrier.h> > > > #include <asm/fence.h> > > > +#include <asm/processor.h> > > > > > > #define __arch_xchg_masked(prepend, append, r, p, n) \ > > > ({ \ > > > @@ -25,6 +26,7 @@ > > > \ > > > __asm__ __volatile__ ( \ > > > prepend \ > > > + PREFETCHW_ASM(%5) \ > > > "0: lr.w %0, %2\n" \ > > > " and %1, %0, %z4\n" \ > > > " or %1, %1, %z3\n" \ > > > @@ -32,7 +34,7 @@ > > > " bnez %1, 0b\n" \ > > > append \ > > > : "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b)) \ > > > - : "rJ" (__newx), "rJ" (~__mask) \ > > > + : "rJ" (__newx), "rJ" (~__mask), "rJ" (__ptr32b) \ > > > : "memory"); \ > > > \ > > > r = (__typeof__(*(p)))((__retx & __mask) >> __s); \ > > > diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h > > > index b7b58258f6c7..78b7b8b53778 100644 > > > --- a/arch/riscv/include/asm/hwcap.h > > > +++ b/arch/riscv/include/asm/hwcap.h > > > @@ -58,6 +58,7 @@ > > > #define RISCV_ISA_EXT_ZICSR 40 > > > #define RISCV_ISA_EXT_ZIFENCEI 41 > > > #define RISCV_ISA_EXT_ZIHPM 42 > > > +#define RISCV_ISA_EXT_ZICBOP 43 > > > > > > #define RISCV_ISA_EXT_MAX 64 > > > > > > diff --git a/arch/riscv/include/asm/insn-def.h b/arch/riscv/include/asm/insn-def.h > > > index 6960beb75f32..dc590d331894 100644 > > > --- a/arch/riscv/include/asm/insn-def.h > > > +++ b/arch/riscv/include/asm/insn-def.h > > > @@ -134,6 +134,7 @@ > > > > > > #define RV_OPCODE_MISC_MEM RV_OPCODE(15) > > > #define RV_OPCODE_SYSTEM RV_OPCODE(115) > > > +#define RV_OPCODE_PREFETCH RV_OPCODE(19) > > > > > > #define HFENCE_VVMA(vaddr, asid) \ > > > INSN_R(OPCODE_SYSTEM, FUNC3(0), FUNC7(17), \ > > > @@ -196,4 +197,8 @@ > > > INSN_I(OPCODE_MISC_MEM, FUNC3(2), __RD(0), \ > > > RS1(base), SIMM12(4)) > > > > > > +#define CBO_prefetchw(base) \ > > > + INSN_R(OPCODE_PREFETCH, FUNC3(6), FUNC7(0), \ > > > + RD(x0), RS1(base), RS2(x0)) > > > + > > > > I understand that here you create the instruction via bitfield, following > > the ISA, and this enables using instructions not available on the > > toolchain. > > > > It took me some time to find the document with this instruction, so please > > add this to the commit msg: > > > > https://github.com/riscv/riscv-CMOs/blob/master/specifications/cmobase-v1.0.pdf > > Page 23. > > > > IIUC, the instruction is "prefetch.w". > > > > Maybe I am missing something, but in the document the rs2 field > > (PREFETCH.W) contains a 0x3, while the above looks to have a 0 instead. > > > > rs2 field = 0x0 would be a prefetch.i (instruction prefetch) instead. > > > > Is the above correct, or am I missing something? > Oh, you are right. My fault, thx for pointing out. It should be: > + INSN_R(OPCODE_PREFETCH, FUNC3(6), FUNC7(0), \ > + RD(x0), RS1(base), RS2(x3)) Now I am curious to check if / how will this impact performance. :) (Please let me know) > > > > > > > Thanks! > > Leo > > > > > #endif /* __ASM_INSN_DEF_H */ > > > diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h > > > index de9da852f78d..7ad3a24212e8 100644 > > > --- a/arch/riscv/include/asm/processor.h > > > +++ b/arch/riscv/include/asm/processor.h > > > @@ -12,6 +12,8 @@ > > > #include <vdso/processor.h> > > > > > > #include <asm/ptrace.h> > > > +#include <asm/insn-def.h> > > > +#include <asm/hwcap.h> > > > > > > #ifdef CONFIG_64BIT > > > #define DEFAULT_MAP_WINDOW (UL(1) << (MMAP_VA_BITS - 1)) > > > @@ -103,6 +105,17 @@ static inline void arch_thread_struct_whitelist(unsigned long *offset, > > > #define KSTK_EIP(tsk) (ulong)(task_pt_regs(tsk)->epc) > > > #define KSTK_ESP(tsk) (ulong)(task_pt_regs(tsk)->sp) > > > > > > +#define ARCH_HAS_PREFETCHW > > > +#define PREFETCHW_ASM(base) ALTERNATIVE(__nops(1), \ > > > + CBO_prefetchw(base), \ > > > + 0, \ > > > + RISCV_ISA_EXT_ZICBOP, \ > > > + CONFIG_RISCV_ISA_ZICBOP) > > > +static inline void prefetchw(const void *ptr) > > > +{ > > > + asm volatile(PREFETCHW_ASM(%0) > > > + : : "r" (ptr) : "memory"); > > > +} > > > > > > /* Do necessary setup to start up a newly executed thread. */ > > > extern void start_thread(struct pt_regs *regs, > > > diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c > > > index ef7b4fd9e876..e0b897db0b97 100644 > > > --- a/arch/riscv/kernel/cpufeature.c > > > +++ b/arch/riscv/kernel/cpufeature.c > > > @@ -159,6 +159,7 @@ const struct riscv_isa_ext_data riscv_isa_ext[] = { > > > __RISCV_ISA_EXT_DATA(h, RISCV_ISA_EXT_h), > > > __RISCV_ISA_EXT_DATA(zicbom, RISCV_ISA_EXT_ZICBOM), > > > __RISCV_ISA_EXT_DATA(zicboz, RISCV_ISA_EXT_ZICBOZ), > > > + __RISCV_ISA_EXT_DATA(zicbop, RISCV_ISA_EXT_ZICBOP), > > > __RISCV_ISA_EXT_DATA(zicntr, RISCV_ISA_EXT_ZICNTR), > > > __RISCV_ISA_EXT_DATA(zicsr, RISCV_ISA_EXT_ZICSR), > > > __RISCV_ISA_EXT_DATA(zifencei, RISCV_ISA_EXT_ZIFENCEI), > > > -- > > > 2.36.1 > > > > > > > > -- > Best Regards > Guo Ren >
On Sat, Sep 16, 2023 at 9:25 AM Leonardo Bras <leobras@redhat.com> wrote: > > On Fri, Sep 15, 2023 at 08:36:31PM +0800, Guo Ren wrote: > > On Wed, Sep 13, 2023 at 4:50 PM Leonardo Bras <leobras@redhat.com> wrote: > > > > > > On Sun, Sep 10, 2023 at 04:28:57AM -0400, guoren@kernel.org wrote: > > > > From: Guo Ren <guoren@linux.alibaba.com> > > > > > > > > Cache-block prefetch instructions are HINTs to the hardware to > > > > indicate that software intends to perform a particular type of > > > > memory access in the near future. Enable ARCH_HAS_PREFETCHW and > > > > improve the arch_xchg for qspinlock xchg_tail. > > > > > > > > Signed-off-by: Guo Ren <guoren@linux.alibaba.com> > > > > Signed-off-by: Guo Ren <guoren@kernel.org> > > > > --- > > > > arch/riscv/Kconfig | 15 +++++++++++++++ > > > > arch/riscv/include/asm/cmpxchg.h | 4 +++- > > > > arch/riscv/include/asm/hwcap.h | 1 + > > > > arch/riscv/include/asm/insn-def.h | 5 +++++ > > > > arch/riscv/include/asm/processor.h | 13 +++++++++++++ > > > > arch/riscv/kernel/cpufeature.c | 1 + > > > > 6 files changed, 38 insertions(+), 1 deletion(-) > > > > > > > > diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig > > > > index e9ae6fa232c3..2c346fe169c1 100644 > > > > --- a/arch/riscv/Kconfig > > > > +++ b/arch/riscv/Kconfig > > > > @@ -617,6 +617,21 @@ config RISCV_ISA_ZICBOZ > > > > > > > > If you don't know what to do here, say Y. > > > > > > > > +config RISCV_ISA_ZICBOP > > > > + bool "Zicbop extension support for cache block prefetch" > > > > + depends on MMU > > > > + depends on RISCV_ALTERNATIVE > > > > + default y > > > > + help > > > > + Adds support to dynamically detect the presence of the ZICBOP > > > > + extension (Cache Block Prefetch Operations) and enable its > > > > + usage. > > > > + > > > > + The Zicbop extension can be used to prefetch cache block for > > > > + read/write/instruction fetch. > > > > + > > > > + If you don't know what to do here, say Y. > > > > + > > > > config TOOLCHAIN_HAS_ZIHINTPAUSE > > > > bool > > > > default y > > > > diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h > > > > index 702725727671..56eff7a9d2d2 100644 > > > > --- a/arch/riscv/include/asm/cmpxchg.h > > > > +++ b/arch/riscv/include/asm/cmpxchg.h > > > > @@ -11,6 +11,7 @@ > > > > > > > > #include <asm/barrier.h> > > > > #include <asm/fence.h> > > > > +#include <asm/processor.h> > > > > > > > > #define __arch_xchg_masked(prepend, append, r, p, n) \ > > > > ({ \ > > > > @@ -25,6 +26,7 @@ > > > > \ > > > > __asm__ __volatile__ ( \ > > > > prepend \ > > > > + PREFETCHW_ASM(%5) \ > > > > "0: lr.w %0, %2\n" \ > > > > " and %1, %0, %z4\n" \ > > > > " or %1, %1, %z3\n" \ > > > > @@ -32,7 +34,7 @@ > > > > " bnez %1, 0b\n" \ > > > > append \ > > > > : "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b)) \ > > > > - : "rJ" (__newx), "rJ" (~__mask) \ > > > > + : "rJ" (__newx), "rJ" (~__mask), "rJ" (__ptr32b) \ > > > > : "memory"); \ > > > > \ > > > > r = (__typeof__(*(p)))((__retx & __mask) >> __s); \ > > > > diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h > > > > index b7b58258f6c7..78b7b8b53778 100644 > > > > --- a/arch/riscv/include/asm/hwcap.h > > > > +++ b/arch/riscv/include/asm/hwcap.h > > > > @@ -58,6 +58,7 @@ > > > > #define RISCV_ISA_EXT_ZICSR 40 > > > > #define RISCV_ISA_EXT_ZIFENCEI 41 > > > > #define RISCV_ISA_EXT_ZIHPM 42 > > > > +#define RISCV_ISA_EXT_ZICBOP 43 > > > > > > > > #define RISCV_ISA_EXT_MAX 64 > > > > > > > > diff --git a/arch/riscv/include/asm/insn-def.h b/arch/riscv/include/asm/insn-def.h > > > > index 6960beb75f32..dc590d331894 100644 > > > > --- a/arch/riscv/include/asm/insn-def.h > > > > +++ b/arch/riscv/include/asm/insn-def.h > > > > @@ -134,6 +134,7 @@ > > > > > > > > #define RV_OPCODE_MISC_MEM RV_OPCODE(15) > > > > #define RV_OPCODE_SYSTEM RV_OPCODE(115) > > > > +#define RV_OPCODE_PREFETCH RV_OPCODE(19) > > > > > > > > #define HFENCE_VVMA(vaddr, asid) \ > > > > INSN_R(OPCODE_SYSTEM, FUNC3(0), FUNC7(17), \ > > > > @@ -196,4 +197,8 @@ > > > > INSN_I(OPCODE_MISC_MEM, FUNC3(2), __RD(0), \ > > > > RS1(base), SIMM12(4)) > > > > > > > > +#define CBO_prefetchw(base) \ > > > > + INSN_R(OPCODE_PREFETCH, FUNC3(6), FUNC7(0), \ > > > > + RD(x0), RS1(base), RS2(x0)) > > > > + > > > > > > I understand that here you create the instruction via bitfield, following > > > the ISA, and this enables using instructions not available on the > > > toolchain. > > > > > > It took me some time to find the document with this instruction, so please > > > add this to the commit msg: > > > > > > https://github.com/riscv/riscv-CMOs/blob/master/specifications/cmobase-v1.0.pdf > > > Page 23. > > > > > > IIUC, the instruction is "prefetch.w". > > > > > > Maybe I am missing something, but in the document the rs2 field > > > (PREFETCH.W) contains a 0x3, while the above looks to have a 0 instead. > > > > > > rs2 field = 0x0 would be a prefetch.i (instruction prefetch) instead. > > > > > > Is the above correct, or am I missing something? > > Oh, you are right. My fault, thx for pointing out. It should be: > > + INSN_R(OPCODE_PREFETCH, FUNC3(6), FUNC7(0), \ > > + RD(x0), RS1(base), RS2(x3)) > > Now I am curious to check if / how will this impact performance. :) > (Please let me know) Ref: commit 0ea366f5e1b6 ("arm64: atomics: prefetch the destination word for write prior to stxr") commit 86d231459d6d ("bpf: cpumap memory prefetchw optimizations for struct page") > > > > > > > > > > > > > Thanks! > > > Leo > > > > > > > #endif /* __ASM_INSN_DEF_H */ > > > > diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h > > > > index de9da852f78d..7ad3a24212e8 100644 > > > > --- a/arch/riscv/include/asm/processor.h > > > > +++ b/arch/riscv/include/asm/processor.h > > > > @@ -12,6 +12,8 @@ > > > > #include <vdso/processor.h> > > > > > > > > #include <asm/ptrace.h> > > > > +#include <asm/insn-def.h> > > > > +#include <asm/hwcap.h> > > > > > > > > #ifdef CONFIG_64BIT > > > > #define DEFAULT_MAP_WINDOW (UL(1) << (MMAP_VA_BITS - 1)) > > > > @@ -103,6 +105,17 @@ static inline void arch_thread_struct_whitelist(unsigned long *offset, > > > > #define KSTK_EIP(tsk) (ulong)(task_pt_regs(tsk)->epc) > > > > #define KSTK_ESP(tsk) (ulong)(task_pt_regs(tsk)->sp) > > > > > > > > +#define ARCH_HAS_PREFETCHW > > > > +#define PREFETCHW_ASM(base) ALTERNATIVE(__nops(1), \ > > > > + CBO_prefetchw(base), \ > > > > + 0, \ > > > > + RISCV_ISA_EXT_ZICBOP, \ > > > > + CONFIG_RISCV_ISA_ZICBOP) > > > > +static inline void prefetchw(const void *ptr) > > > > +{ > > > > + asm volatile(PREFETCHW_ASM(%0) > > > > + : : "r" (ptr) : "memory"); > > > > +} > > > > > > > > /* Do necessary setup to start up a newly executed thread. */ > > > > extern void start_thread(struct pt_regs *regs, > > > > diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c > > > > index ef7b4fd9e876..e0b897db0b97 100644 > > > > --- a/arch/riscv/kernel/cpufeature.c > > > > +++ b/arch/riscv/kernel/cpufeature.c > > > > @@ -159,6 +159,7 @@ const struct riscv_isa_ext_data riscv_isa_ext[] = { > > > > __RISCV_ISA_EXT_DATA(h, RISCV_ISA_EXT_h), > > > > __RISCV_ISA_EXT_DATA(zicbom, RISCV_ISA_EXT_ZICBOM), > > > > __RISCV_ISA_EXT_DATA(zicboz, RISCV_ISA_EXT_ZICBOZ), > > > > + __RISCV_ISA_EXT_DATA(zicbop, RISCV_ISA_EXT_ZICBOP), > > > > __RISCV_ISA_EXT_DATA(zicntr, RISCV_ISA_EXT_ZICNTR), > > > > __RISCV_ISA_EXT_DATA(zicsr, RISCV_ISA_EXT_ZICSR), > > > > __RISCV_ISA_EXT_DATA(zifencei, RISCV_ISA_EXT_ZIFENCEI), > > > > -- > > > > 2.36.1 > > > > > > > > > > > > > -- > > Best Regards > > Guo Ren > > >
On Sun, Sep 17, 2023 at 10:34:36PM +0800, Guo Ren wrote: > On Sat, Sep 16, 2023 at 9:25 AM Leonardo Bras <leobras@redhat.com> wrote: > > > > On Fri, Sep 15, 2023 at 08:36:31PM +0800, Guo Ren wrote: > > > On Wed, Sep 13, 2023 at 4:50 PM Leonardo Bras <leobras@redhat.com> wrote: > > > > > > > > On Sun, Sep 10, 2023 at 04:28:57AM -0400, guoren@kernel.org wrote: > > > > > From: Guo Ren <guoren@linux.alibaba.com> > > > > > > > > > > Cache-block prefetch instructions are HINTs to the hardware to > > > > > indicate that software intends to perform a particular type of > > > > > memory access in the near future. Enable ARCH_HAS_PREFETCHW and > > > > > improve the arch_xchg for qspinlock xchg_tail. > > > > > > > > > > Signed-off-by: Guo Ren <guoren@linux.alibaba.com> > > > > > Signed-off-by: Guo Ren <guoren@kernel.org> > > > > > --- > > > > > arch/riscv/Kconfig | 15 +++++++++++++++ > > > > > arch/riscv/include/asm/cmpxchg.h | 4 +++- > > > > > arch/riscv/include/asm/hwcap.h | 1 + > > > > > arch/riscv/include/asm/insn-def.h | 5 +++++ > > > > > arch/riscv/include/asm/processor.h | 13 +++++++++++++ > > > > > arch/riscv/kernel/cpufeature.c | 1 + > > > > > 6 files changed, 38 insertions(+), 1 deletion(-) > > > > > > > > > > diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig > > > > > index e9ae6fa232c3..2c346fe169c1 100644 > > > > > --- a/arch/riscv/Kconfig > > > > > +++ b/arch/riscv/Kconfig > > > > > @@ -617,6 +617,21 @@ config RISCV_ISA_ZICBOZ > > > > > > > > > > If you don't know what to do here, say Y. > > > > > > > > > > +config RISCV_ISA_ZICBOP > > > > > + bool "Zicbop extension support for cache block prefetch" > > > > > + depends on MMU > > > > > + depends on RISCV_ALTERNATIVE > > > > > + default y > > > > > + help > > > > > + Adds support to dynamically detect the presence of the ZICBOP > > > > > + extension (Cache Block Prefetch Operations) and enable its > > > > > + usage. > > > > > + > > > > > + The Zicbop extension can be used to prefetch cache block for > > > > > + read/write/instruction fetch. > > > > > + > > > > > + If you don't know what to do here, say Y. > > > > > + > > > > > config TOOLCHAIN_HAS_ZIHINTPAUSE > > > > > bool > > > > > default y > > > > > diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h > > > > > index 702725727671..56eff7a9d2d2 100644 > > > > > --- a/arch/riscv/include/asm/cmpxchg.h > > > > > +++ b/arch/riscv/include/asm/cmpxchg.h > > > > > @@ -11,6 +11,7 @@ > > > > > > > > > > #include <asm/barrier.h> > > > > > #include <asm/fence.h> > > > > > +#include <asm/processor.h> > > > > > > > > > > #define __arch_xchg_masked(prepend, append, r, p, n) \ > > > > > ({ \ > > > > > @@ -25,6 +26,7 @@ > > > > > \ > > > > > __asm__ __volatile__ ( \ > > > > > prepend \ > > > > > + PREFETCHW_ASM(%5) \ > > > > > "0: lr.w %0, %2\n" \ > > > > > " and %1, %0, %z4\n" \ > > > > > " or %1, %1, %z3\n" \ > > > > > @@ -32,7 +34,7 @@ > > > > > " bnez %1, 0b\n" \ > > > > > append \ > > > > > : "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b)) \ > > > > > - : "rJ" (__newx), "rJ" (~__mask) \ > > > > > + : "rJ" (__newx), "rJ" (~__mask), "rJ" (__ptr32b) \ > > > > > : "memory"); \ > > > > > \ > > > > > r = (__typeof__(*(p)))((__retx & __mask) >> __s); \ > > > > > diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h > > > > > index b7b58258f6c7..78b7b8b53778 100644 > > > > > --- a/arch/riscv/include/asm/hwcap.h > > > > > +++ b/arch/riscv/include/asm/hwcap.h > > > > > @@ -58,6 +58,7 @@ > > > > > #define RISCV_ISA_EXT_ZICSR 40 > > > > > #define RISCV_ISA_EXT_ZIFENCEI 41 > > > > > #define RISCV_ISA_EXT_ZIHPM 42 > > > > > +#define RISCV_ISA_EXT_ZICBOP 43 > > > > > > > > > > #define RISCV_ISA_EXT_MAX 64 > > > > > > > > > > diff --git a/arch/riscv/include/asm/insn-def.h b/arch/riscv/include/asm/insn-def.h > > > > > index 6960beb75f32..dc590d331894 100644 > > > > > --- a/arch/riscv/include/asm/insn-def.h > > > > > +++ b/arch/riscv/include/asm/insn-def.h > > > > > @@ -134,6 +134,7 @@ > > > > > > > > > > #define RV_OPCODE_MISC_MEM RV_OPCODE(15) > > > > > #define RV_OPCODE_SYSTEM RV_OPCODE(115) > > > > > +#define RV_OPCODE_PREFETCH RV_OPCODE(19) > > > > > > > > > > #define HFENCE_VVMA(vaddr, asid) \ > > > > > INSN_R(OPCODE_SYSTEM, FUNC3(0), FUNC7(17), \ > > > > > @@ -196,4 +197,8 @@ > > > > > INSN_I(OPCODE_MISC_MEM, FUNC3(2), __RD(0), \ > > > > > RS1(base), SIMM12(4)) > > > > > > > > > > +#define CBO_prefetchw(base) \ > > > > > + INSN_R(OPCODE_PREFETCH, FUNC3(6), FUNC7(0), \ > > > > > + RD(x0), RS1(base), RS2(x0)) > > > > > + > > > > > > > > I understand that here you create the instruction via bitfield, following > > > > the ISA, and this enables using instructions not available on the > > > > toolchain. > > > > > > > > It took me some time to find the document with this instruction, so please > > > > add this to the commit msg: > > > > > > > > https://github.com/riscv/riscv-CMOs/blob/master/specifications/cmobase-v1.0.pdf > > > > Page 23. > > > > > > > > IIUC, the instruction is "prefetch.w". > > > > > > > > Maybe I am missing something, but in the document the rs2 field > > > > (PREFETCH.W) contains a 0x3, while the above looks to have a 0 instead. > > > > > > > > rs2 field = 0x0 would be a prefetch.i (instruction prefetch) instead. > > > > > > > > Is the above correct, or am I missing something? > > > Oh, you are right. My fault, thx for pointing out. It should be: > > > + INSN_R(OPCODE_PREFETCH, FUNC3(6), FUNC7(0), \ > > > + RD(x0), RS1(base), RS2(x3)) > > > > Now I am curious to check if / how will this impact performance. :) > > (Please let me know) > Ref: > commit 0ea366f5e1b6 ("arm64: atomics: prefetch the destination word > for write prior to stxr") > commit 86d231459d6d ("bpf: cpumap memory prefetchw optimizations for > struct page") Oh, I understand that prefetch.w is very useful for performance :) What I meant is that previously this patch was issuing a prefetch.i, and now it's issuing a prefetch.w (as intended). What got me curious is how much would it impact the performance to change the prefetch.i to prefetch.w. :) Thanks! Leo > > > > > > > > > > > > > > > > > > > > Thanks! > > > > Leo > > > > > > > > > #endif /* __ASM_INSN_DEF_H */ > > > > > diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h > > > > > index de9da852f78d..7ad3a24212e8 100644 > > > > > --- a/arch/riscv/include/asm/processor.h > > > > > +++ b/arch/riscv/include/asm/processor.h > > > > > @@ -12,6 +12,8 @@ > > > > > #include <vdso/processor.h> > > > > > > > > > > #include <asm/ptrace.h> > > > > > +#include <asm/insn-def.h> > > > > > +#include <asm/hwcap.h> > > > > > > > > > > #ifdef CONFIG_64BIT > > > > > #define DEFAULT_MAP_WINDOW (UL(1) << (MMAP_VA_BITS - 1)) > > > > > @@ -103,6 +105,17 @@ static inline void arch_thread_struct_whitelist(unsigned long *offset, > > > > > #define KSTK_EIP(tsk) (ulong)(task_pt_regs(tsk)->epc) > > > > > #define KSTK_ESP(tsk) (ulong)(task_pt_regs(tsk)->sp) > > > > > > > > > > +#define ARCH_HAS_PREFETCHW > > > > > +#define PREFETCHW_ASM(base) ALTERNATIVE(__nops(1), \ > > > > > + CBO_prefetchw(base), \ > > > > > + 0, \ > > > > > + RISCV_ISA_EXT_ZICBOP, \ > > > > > + CONFIG_RISCV_ISA_ZICBOP) > > > > > +static inline void prefetchw(const void *ptr) > > > > > +{ > > > > > + asm volatile(PREFETCHW_ASM(%0) > > > > > + : : "r" (ptr) : "memory"); > > > > > +} > > > > > > > > > > /* Do necessary setup to start up a newly executed thread. */ > > > > > extern void start_thread(struct pt_regs *regs, > > > > > diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c > > > > > index ef7b4fd9e876..e0b897db0b97 100644 > > > > > --- a/arch/riscv/kernel/cpufeature.c > > > > > +++ b/arch/riscv/kernel/cpufeature.c > > > > > @@ -159,6 +159,7 @@ const struct riscv_isa_ext_data riscv_isa_ext[] = { > > > > > __RISCV_ISA_EXT_DATA(h, RISCV_ISA_EXT_h), > > > > > __RISCV_ISA_EXT_DATA(zicbom, RISCV_ISA_EXT_ZICBOM), > > > > > __RISCV_ISA_EXT_DATA(zicboz, RISCV_ISA_EXT_ZICBOZ), > > > > > + __RISCV_ISA_EXT_DATA(zicbop, RISCV_ISA_EXT_ZICBOP), > > > > > __RISCV_ISA_EXT_DATA(zicntr, RISCV_ISA_EXT_ZICNTR), > > > > > __RISCV_ISA_EXT_DATA(zicsr, RISCV_ISA_EXT_ZICSR), > > > > > __RISCV_ISA_EXT_DATA(zifencei, RISCV_ISA_EXT_ZIFENCEI), > > > > > -- > > > > > 2.36.1 > > > > > > > > > > > > > > > > > > -- > > > Best Regards > > > Guo Ren > > > > > > > > -- > Best Regards > Guo Ren >
On Tue, Sep 19, 2023 at 1:13 PM Leonardo Bras <leobras@redhat.com> wrote: > > On Sun, Sep 17, 2023 at 10:34:36PM +0800, Guo Ren wrote: > > On Sat, Sep 16, 2023 at 9:25 AM Leonardo Bras <leobras@redhat.com> wrote: > > > > > > On Fri, Sep 15, 2023 at 08:36:31PM +0800, Guo Ren wrote: > > > > On Wed, Sep 13, 2023 at 4:50 PM Leonardo Bras <leobras@redhat.com> wrote: > > > > > > > > > > On Sun, Sep 10, 2023 at 04:28:57AM -0400, guoren@kernel.org wrote: > > > > > > From: Guo Ren <guoren@linux.alibaba.com> > > > > > > > > > > > > Cache-block prefetch instructions are HINTs to the hardware to > > > > > > indicate that software intends to perform a particular type of > > > > > > memory access in the near future. Enable ARCH_HAS_PREFETCHW and > > > > > > improve the arch_xchg for qspinlock xchg_tail. > > > > > > > > > > > > Signed-off-by: Guo Ren <guoren@linux.alibaba.com> > > > > > > Signed-off-by: Guo Ren <guoren@kernel.org> > > > > > > --- > > > > > > arch/riscv/Kconfig | 15 +++++++++++++++ > > > > > > arch/riscv/include/asm/cmpxchg.h | 4 +++- > > > > > > arch/riscv/include/asm/hwcap.h | 1 + > > > > > > arch/riscv/include/asm/insn-def.h | 5 +++++ > > > > > > arch/riscv/include/asm/processor.h | 13 +++++++++++++ > > > > > > arch/riscv/kernel/cpufeature.c | 1 + > > > > > > 6 files changed, 38 insertions(+), 1 deletion(-) > > > > > > > > > > > > diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig > > > > > > index e9ae6fa232c3..2c346fe169c1 100644 > > > > > > --- a/arch/riscv/Kconfig > > > > > > +++ b/arch/riscv/Kconfig > > > > > > @@ -617,6 +617,21 @@ config RISCV_ISA_ZICBOZ > > > > > > > > > > > > If you don't know what to do here, say Y. > > > > > > > > > > > > +config RISCV_ISA_ZICBOP > > > > > > + bool "Zicbop extension support for cache block prefetch" > > > > > > + depends on MMU > > > > > > + depends on RISCV_ALTERNATIVE > > > > > > + default y > > > > > > + help > > > > > > + Adds support to dynamically detect the presence of the ZICBOP > > > > > > + extension (Cache Block Prefetch Operations) and enable its > > > > > > + usage. > > > > > > + > > > > > > + The Zicbop extension can be used to prefetch cache block for > > > > > > + read/write/instruction fetch. > > > > > > + > > > > > > + If you don't know what to do here, say Y. > > > > > > + > > > > > > config TOOLCHAIN_HAS_ZIHINTPAUSE > > > > > > bool > > > > > > default y > > > > > > diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h > > > > > > index 702725727671..56eff7a9d2d2 100644 > > > > > > --- a/arch/riscv/include/asm/cmpxchg.h > > > > > > +++ b/arch/riscv/include/asm/cmpxchg.h > > > > > > @@ -11,6 +11,7 @@ > > > > > > > > > > > > #include <asm/barrier.h> > > > > > > #include <asm/fence.h> > > > > > > +#include <asm/processor.h> > > > > > > > > > > > > #define __arch_xchg_masked(prepend, append, r, p, n) \ > > > > > > ({ \ > > > > > > @@ -25,6 +26,7 @@ > > > > > > \ > > > > > > __asm__ __volatile__ ( \ > > > > > > prepend \ > > > > > > + PREFETCHW_ASM(%5) \ > > > > > > "0: lr.w %0, %2\n" \ > > > > > > " and %1, %0, %z4\n" \ > > > > > > " or %1, %1, %z3\n" \ > > > > > > @@ -32,7 +34,7 @@ > > > > > > " bnez %1, 0b\n" \ > > > > > > append \ > > > > > > : "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b)) \ > > > > > > - : "rJ" (__newx), "rJ" (~__mask) \ > > > > > > + : "rJ" (__newx), "rJ" (~__mask), "rJ" (__ptr32b) \ > > > > > > : "memory"); \ > > > > > > \ > > > > > > r = (__typeof__(*(p)))((__retx & __mask) >> __s); \ > > > > > > diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h > > > > > > index b7b58258f6c7..78b7b8b53778 100644 > > > > > > --- a/arch/riscv/include/asm/hwcap.h > > > > > > +++ b/arch/riscv/include/asm/hwcap.h > > > > > > @@ -58,6 +58,7 @@ > > > > > > #define RISCV_ISA_EXT_ZICSR 40 > > > > > > #define RISCV_ISA_EXT_ZIFENCEI 41 > > > > > > #define RISCV_ISA_EXT_ZIHPM 42 > > > > > > +#define RISCV_ISA_EXT_ZICBOP 43 > > > > > > > > > > > > #define RISCV_ISA_EXT_MAX 64 > > > > > > > > > > > > diff --git a/arch/riscv/include/asm/insn-def.h b/arch/riscv/include/asm/insn-def.h > > > > > > index 6960beb75f32..dc590d331894 100644 > > > > > > --- a/arch/riscv/include/asm/insn-def.h > > > > > > +++ b/arch/riscv/include/asm/insn-def.h > > > > > > @@ -134,6 +134,7 @@ > > > > > > > > > > > > #define RV_OPCODE_MISC_MEM RV_OPCODE(15) > > > > > > #define RV_OPCODE_SYSTEM RV_OPCODE(115) > > > > > > +#define RV_OPCODE_PREFETCH RV_OPCODE(19) > > > > > > > > > > > > #define HFENCE_VVMA(vaddr, asid) \ > > > > > > INSN_R(OPCODE_SYSTEM, FUNC3(0), FUNC7(17), \ > > > > > > @@ -196,4 +197,8 @@ > > > > > > INSN_I(OPCODE_MISC_MEM, FUNC3(2), __RD(0), \ > > > > > > RS1(base), SIMM12(4)) > > > > > > > > > > > > +#define CBO_prefetchw(base) \ > > > > > > + INSN_R(OPCODE_PREFETCH, FUNC3(6), FUNC7(0), \ > > > > > > + RD(x0), RS1(base), RS2(x0)) > > > > > > + > > > > > > > > > > I understand that here you create the instruction via bitfield, following > > > > > the ISA, and this enables using instructions not available on the > > > > > toolchain. > > > > > > > > > > It took me some time to find the document with this instruction, so please > > > > > add this to the commit msg: > > > > > > > > > > https://github.com/riscv/riscv-CMOs/blob/master/specifications/cmobase-v1.0.pdf > > > > > Page 23. > > > > > > > > > > IIUC, the instruction is "prefetch.w". > > > > > > > > > > Maybe I am missing something, but in the document the rs2 field > > > > > (PREFETCH.W) contains a 0x3, while the above looks to have a 0 instead. > > > > > > > > > > rs2 field = 0x0 would be a prefetch.i (instruction prefetch) instead. > > > > > > > > > > Is the above correct, or am I missing something? > > > > Oh, you are right. My fault, thx for pointing out. It should be: > > > > + INSN_R(OPCODE_PREFETCH, FUNC3(6), FUNC7(0), \ > > > > + RD(x0), RS1(base), RS2(x3)) > > > > > > Now I am curious to check if / how will this impact performance. :) > > > (Please let me know) > > Ref: > > commit 0ea366f5e1b6 ("arm64: atomics: prefetch the destination word > > for write prior to stxr") > > commit 86d231459d6d ("bpf: cpumap memory prefetchw optimizations for > > struct page") > > Oh, I understand that prefetch.w is very useful for performance :) > > What I meant is that previously this patch was issuing a prefetch.i, > and now it's issuing a prefetch.w (as intended). > > What got me curious is how much would it impact the performance to change > the prefetch.i to prefetch.w. :) The current SOPHO sg2042 hardware platform didn't support prefetch.w instruction. So there is no performance result I could share with you. Our next generation of processors would support ZICBOP. > > Thanks! > Leo > > > > > > > > > > > > > > > > > > > > > > > > > > > > > Thanks! > > > > > Leo > > > > > > > > > > > #endif /* __ASM_INSN_DEF_H */ > > > > > > diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h > > > > > > index de9da852f78d..7ad3a24212e8 100644 > > > > > > --- a/arch/riscv/include/asm/processor.h > > > > > > +++ b/arch/riscv/include/asm/processor.h > > > > > > @@ -12,6 +12,8 @@ > > > > > > #include <vdso/processor.h> > > > > > > > > > > > > #include <asm/ptrace.h> > > > > > > +#include <asm/insn-def.h> > > > > > > +#include <asm/hwcap.h> > > > > > > > > > > > > #ifdef CONFIG_64BIT > > > > > > #define DEFAULT_MAP_WINDOW (UL(1) << (MMAP_VA_BITS - 1)) > > > > > > @@ -103,6 +105,17 @@ static inline void arch_thread_struct_whitelist(unsigned long *offset, > > > > > > #define KSTK_EIP(tsk) (ulong)(task_pt_regs(tsk)->epc) > > > > > > #define KSTK_ESP(tsk) (ulong)(task_pt_regs(tsk)->sp) > > > > > > > > > > > > +#define ARCH_HAS_PREFETCHW > > > > > > +#define PREFETCHW_ASM(base) ALTERNATIVE(__nops(1), \ > > > > > > + CBO_prefetchw(base), \ > > > > > > + 0, \ > > > > > > + RISCV_ISA_EXT_ZICBOP, \ > > > > > > + CONFIG_RISCV_ISA_ZICBOP) > > > > > > +static inline void prefetchw(const void *ptr) > > > > > > +{ > > > > > > + asm volatile(PREFETCHW_ASM(%0) > > > > > > + : : "r" (ptr) : "memory"); > > > > > > +} > > > > > > > > > > > > /* Do necessary setup to start up a newly executed thread. */ > > > > > > extern void start_thread(struct pt_regs *regs, > > > > > > diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c > > > > > > index ef7b4fd9e876..e0b897db0b97 100644 > > > > > > --- a/arch/riscv/kernel/cpufeature.c > > > > > > +++ b/arch/riscv/kernel/cpufeature.c > > > > > > @@ -159,6 +159,7 @@ const struct riscv_isa_ext_data riscv_isa_ext[] = { > > > > > > __RISCV_ISA_EXT_DATA(h, RISCV_ISA_EXT_h), > > > > > > __RISCV_ISA_EXT_DATA(zicbom, RISCV_ISA_EXT_ZICBOM), > > > > > > __RISCV_ISA_EXT_DATA(zicboz, RISCV_ISA_EXT_ZICBOZ), > > > > > > + __RISCV_ISA_EXT_DATA(zicbop, RISCV_ISA_EXT_ZICBOP), > > > > > > __RISCV_ISA_EXT_DATA(zicntr, RISCV_ISA_EXT_ZICNTR), > > > > > > __RISCV_ISA_EXT_DATA(zicsr, RISCV_ISA_EXT_ZICSR), > > > > > > __RISCV_ISA_EXT_DATA(zifencei, RISCV_ISA_EXT_ZIFENCEI), > > > > > > -- > > > > > > 2.36.1 > > > > > > > > > > > > > > > > > > > > > > > -- > > > > Best Regards > > > > Guo Ren > > > > > > > > > > > > > -- > > Best Regards > > Guo Ren > > >
On Tue, Sep 19, 2023 at 03:53:22PM +0800, Guo Ren wrote: > On Tue, Sep 19, 2023 at 1:13 PM Leonardo Bras <leobras@redhat.com> wrote: > > > > On Sun, Sep 17, 2023 at 10:34:36PM +0800, Guo Ren wrote: > > > On Sat, Sep 16, 2023 at 9:25 AM Leonardo Bras <leobras@redhat.com> wrote: > > > > > > > > On Fri, Sep 15, 2023 at 08:36:31PM +0800, Guo Ren wrote: > > > > > On Wed, Sep 13, 2023 at 4:50 PM Leonardo Bras <leobras@redhat.com> wrote: > > > > > > > > > > > > On Sun, Sep 10, 2023 at 04:28:57AM -0400, guoren@kernel.org wrote: > > > > > > > From: Guo Ren <guoren@linux.alibaba.com> > > > > > > > > > > > > > > Cache-block prefetch instructions are HINTs to the hardware to > > > > > > > indicate that software intends to perform a particular type of > > > > > > > memory access in the near future. Enable ARCH_HAS_PREFETCHW and > > > > > > > improve the arch_xchg for qspinlock xchg_tail. > > > > > > > > > > > > > > Signed-off-by: Guo Ren <guoren@linux.alibaba.com> > > > > > > > Signed-off-by: Guo Ren <guoren@kernel.org> > > > > > > > --- > > > > > > > arch/riscv/Kconfig | 15 +++++++++++++++ > > > > > > > arch/riscv/include/asm/cmpxchg.h | 4 +++- > > > > > > > arch/riscv/include/asm/hwcap.h | 1 + > > > > > > > arch/riscv/include/asm/insn-def.h | 5 +++++ > > > > > > > arch/riscv/include/asm/processor.h | 13 +++++++++++++ > > > > > > > arch/riscv/kernel/cpufeature.c | 1 + > > > > > > > 6 files changed, 38 insertions(+), 1 deletion(-) > > > > > > > > > > > > > > diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig > > > > > > > index e9ae6fa232c3..2c346fe169c1 100644 > > > > > > > --- a/arch/riscv/Kconfig > > > > > > > +++ b/arch/riscv/Kconfig > > > > > > > @@ -617,6 +617,21 @@ config RISCV_ISA_ZICBOZ > > > > > > > > > > > > > > If you don't know what to do here, say Y. > > > > > > > > > > > > > > +config RISCV_ISA_ZICBOP > > > > > > > + bool "Zicbop extension support for cache block prefetch" > > > > > > > + depends on MMU > > > > > > > + depends on RISCV_ALTERNATIVE > > > > > > > + default y > > > > > > > + help > > > > > > > + Adds support to dynamically detect the presence of the ZICBOP > > > > > > > + extension (Cache Block Prefetch Operations) and enable its > > > > > > > + usage. > > > > > > > + > > > > > > > + The Zicbop extension can be used to prefetch cache block for > > > > > > > + read/write/instruction fetch. > > > > > > > + > > > > > > > + If you don't know what to do here, say Y. > > > > > > > + > > > > > > > config TOOLCHAIN_HAS_ZIHINTPAUSE > > > > > > > bool > > > > > > > default y > > > > > > > diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h > > > > > > > index 702725727671..56eff7a9d2d2 100644 > > > > > > > --- a/arch/riscv/include/asm/cmpxchg.h > > > > > > > +++ b/arch/riscv/include/asm/cmpxchg.h > > > > > > > @@ -11,6 +11,7 @@ > > > > > > > > > > > > > > #include <asm/barrier.h> > > > > > > > #include <asm/fence.h> > > > > > > > +#include <asm/processor.h> > > > > > > > > > > > > > > #define __arch_xchg_masked(prepend, append, r, p, n) \ > > > > > > > ({ \ > > > > > > > @@ -25,6 +26,7 @@ > > > > > > > \ > > > > > > > __asm__ __volatile__ ( \ > > > > > > > prepend \ > > > > > > > + PREFETCHW_ASM(%5) \ > > > > > > > "0: lr.w %0, %2\n" \ > > > > > > > " and %1, %0, %z4\n" \ > > > > > > > " or %1, %1, %z3\n" \ > > > > > > > @@ -32,7 +34,7 @@ > > > > > > > " bnez %1, 0b\n" \ > > > > > > > append \ > > > > > > > : "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b)) \ > > > > > > > - : "rJ" (__newx), "rJ" (~__mask) \ > > > > > > > + : "rJ" (__newx), "rJ" (~__mask), "rJ" (__ptr32b) \ > > > > > > > : "memory"); \ > > > > > > > \ > > > > > > > r = (__typeof__(*(p)))((__retx & __mask) >> __s); \ > > > > > > > diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h > > > > > > > index b7b58258f6c7..78b7b8b53778 100644 > > > > > > > --- a/arch/riscv/include/asm/hwcap.h > > > > > > > +++ b/arch/riscv/include/asm/hwcap.h > > > > > > > @@ -58,6 +58,7 @@ > > > > > > > #define RISCV_ISA_EXT_ZICSR 40 > > > > > > > #define RISCV_ISA_EXT_ZIFENCEI 41 > > > > > > > #define RISCV_ISA_EXT_ZIHPM 42 > > > > > > > +#define RISCV_ISA_EXT_ZICBOP 43 > > > > > > > > > > > > > > #define RISCV_ISA_EXT_MAX 64 > > > > > > > > > > > > > > diff --git a/arch/riscv/include/asm/insn-def.h b/arch/riscv/include/asm/insn-def.h > > > > > > > index 6960beb75f32..dc590d331894 100644 > > > > > > > --- a/arch/riscv/include/asm/insn-def.h > > > > > > > +++ b/arch/riscv/include/asm/insn-def.h > > > > > > > @@ -134,6 +134,7 @@ > > > > > > > > > > > > > > #define RV_OPCODE_MISC_MEM RV_OPCODE(15) > > > > > > > #define RV_OPCODE_SYSTEM RV_OPCODE(115) > > > > > > > +#define RV_OPCODE_PREFETCH RV_OPCODE(19) > > > > > > > > > > > > > > #define HFENCE_VVMA(vaddr, asid) \ > > > > > > > INSN_R(OPCODE_SYSTEM, FUNC3(0), FUNC7(17), \ > > > > > > > @@ -196,4 +197,8 @@ > > > > > > > INSN_I(OPCODE_MISC_MEM, FUNC3(2), __RD(0), \ > > > > > > > RS1(base), SIMM12(4)) > > > > > > > > > > > > > > +#define CBO_prefetchw(base) \ > > > > > > > + INSN_R(OPCODE_PREFETCH, FUNC3(6), FUNC7(0), \ > > > > > > > + RD(x0), RS1(base), RS2(x0)) > > > > > > > + > > > > > > > > > > > > I understand that here you create the instruction via bitfield, following > > > > > > the ISA, and this enables using instructions not available on the > > > > > > toolchain. > > > > > > > > > > > > It took me some time to find the document with this instruction, so please > > > > > > add this to the commit msg: > > > > > > > > > > > > https://github.com/riscv/riscv-CMOs/blob/master/specifications/cmobase-v1.0.pdf > > > > > > Page 23. > > > > > > > > > > > > IIUC, the instruction is "prefetch.w". > > > > > > > > > > > > Maybe I am missing something, but in the document the rs2 field > > > > > > (PREFETCH.W) contains a 0x3, while the above looks to have a 0 instead. > > > > > > > > > > > > rs2 field = 0x0 would be a prefetch.i (instruction prefetch) instead. > > > > > > > > > > > > Is the above correct, or am I missing something? > > > > > Oh, you are right. My fault, thx for pointing out. It should be: > > > > > + INSN_R(OPCODE_PREFETCH, FUNC3(6), FUNC7(0), \ > > > > > + RD(x0), RS1(base), RS2(x3)) > > > > > > > > Now I am curious to check if / how will this impact performance. :) > > > > (Please let me know) > > > Ref: > > > commit 0ea366f5e1b6 ("arm64: atomics: prefetch the destination word > > > for write prior to stxr") > > > commit 86d231459d6d ("bpf: cpumap memory prefetchw optimizations for > > > struct page") > > > > Oh, I understand that prefetch.w is very useful for performance :) > > > > What I meant is that previously this patch was issuing a prefetch.i, > > and now it's issuing a prefetch.w (as intended). > > > > What got me curious is how much would it impact the performance to change > > the prefetch.i to prefetch.w. :) > The current SOPHO sg2042 hardware platform didn't support prefetch.w > instruction. So there is no performance result I could share with you. > > Our next generation of processors would support ZICBOP. Oh, okay then. Thanks for sharing this info! Leo > > > > > Thanks! > > Leo > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > Thanks! > > > > > > Leo > > > > > > > > > > > > > #endif /* __ASM_INSN_DEF_H */ > > > > > > > diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h > > > > > > > index de9da852f78d..7ad3a24212e8 100644 > > > > > > > --- a/arch/riscv/include/asm/processor.h > > > > > > > +++ b/arch/riscv/include/asm/processor.h > > > > > > > @@ -12,6 +12,8 @@ > > > > > > > #include <vdso/processor.h> > > > > > > > > > > > > > > #include <asm/ptrace.h> > > > > > > > +#include <asm/insn-def.h> > > > > > > > +#include <asm/hwcap.h> > > > > > > > > > > > > > > #ifdef CONFIG_64BIT > > > > > > > #define DEFAULT_MAP_WINDOW (UL(1) << (MMAP_VA_BITS - 1)) > > > > > > > @@ -103,6 +105,17 @@ static inline void arch_thread_struct_whitelist(unsigned long *offset, > > > > > > > #define KSTK_EIP(tsk) (ulong)(task_pt_regs(tsk)->epc) > > > > > > > #define KSTK_ESP(tsk) (ulong)(task_pt_regs(tsk)->sp) > > > > > > > > > > > > > > +#define ARCH_HAS_PREFETCHW > > > > > > > +#define PREFETCHW_ASM(base) ALTERNATIVE(__nops(1), \ > > > > > > > + CBO_prefetchw(base), \ > > > > > > > + 0, \ > > > > > > > + RISCV_ISA_EXT_ZICBOP, \ > > > > > > > + CONFIG_RISCV_ISA_ZICBOP) > > > > > > > +static inline void prefetchw(const void *ptr) > > > > > > > +{ > > > > > > > + asm volatile(PREFETCHW_ASM(%0) > > > > > > > + : : "r" (ptr) : "memory"); > > > > > > > +} > > > > > > > > > > > > > > /* Do necessary setup to start up a newly executed thread. */ > > > > > > > extern void start_thread(struct pt_regs *regs, > > > > > > > diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c > > > > > > > index ef7b4fd9e876..e0b897db0b97 100644 > > > > > > > --- a/arch/riscv/kernel/cpufeature.c > > > > > > > +++ b/arch/riscv/kernel/cpufeature.c > > > > > > > @@ -159,6 +159,7 @@ const struct riscv_isa_ext_data riscv_isa_ext[] = { > > > > > > > __RISCV_ISA_EXT_DATA(h, RISCV_ISA_EXT_h), > > > > > > > __RISCV_ISA_EXT_DATA(zicbom, RISCV_ISA_EXT_ZICBOM), > > > > > > > __RISCV_ISA_EXT_DATA(zicboz, RISCV_ISA_EXT_ZICBOZ), > > > > > > > + __RISCV_ISA_EXT_DATA(zicbop, RISCV_ISA_EXT_ZICBOP), > > > > > > > __RISCV_ISA_EXT_DATA(zicntr, RISCV_ISA_EXT_ZICNTR), > > > > > > > __RISCV_ISA_EXT_DATA(zicsr, RISCV_ISA_EXT_ZICSR), > > > > > > > __RISCV_ISA_EXT_DATA(zifencei, RISCV_ISA_EXT_ZIFENCEI), > > > > > > > -- > > > > > > > 2.36.1 > > > > > > > > > > > > > > > > > > > > > > > > > > > > -- > > > > > Best Regards > > > > > Guo Ren > > > > > > > > > > > > > > > > > > -- > > > Best Regards > > > Guo Ren > > > > > > > > -- > Best Regards > Guo Ren >
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index e9ae6fa232c3..2c346fe169c1 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -617,6 +617,21 @@ config RISCV_ISA_ZICBOZ If you don't know what to do here, say Y. +config RISCV_ISA_ZICBOP + bool "Zicbop extension support for cache block prefetch" + depends on MMU + depends on RISCV_ALTERNATIVE + default y + help + Adds support to dynamically detect the presence of the ZICBOP + extension (Cache Block Prefetch Operations) and enable its + usage. + + The Zicbop extension can be used to prefetch cache block for + read/write/instruction fetch. + + If you don't know what to do here, say Y. + config TOOLCHAIN_HAS_ZIHINTPAUSE bool default y diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h index 702725727671..56eff7a9d2d2 100644 --- a/arch/riscv/include/asm/cmpxchg.h +++ b/arch/riscv/include/asm/cmpxchg.h @@ -11,6 +11,7 @@ #include <asm/barrier.h> #include <asm/fence.h> +#include <asm/processor.h> #define __arch_xchg_masked(prepend, append, r, p, n) \ ({ \ @@ -25,6 +26,7 @@ \ __asm__ __volatile__ ( \ prepend \ + PREFETCHW_ASM(%5) \ "0: lr.w %0, %2\n" \ " and %1, %0, %z4\n" \ " or %1, %1, %z3\n" \ @@ -32,7 +34,7 @@ " bnez %1, 0b\n" \ append \ : "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b)) \ - : "rJ" (__newx), "rJ" (~__mask) \ + : "rJ" (__newx), "rJ" (~__mask), "rJ" (__ptr32b) \ : "memory"); \ \ r = (__typeof__(*(p)))((__retx & __mask) >> __s); \ diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h index b7b58258f6c7..78b7b8b53778 100644 --- a/arch/riscv/include/asm/hwcap.h +++ b/arch/riscv/include/asm/hwcap.h @@ -58,6 +58,7 @@ #define RISCV_ISA_EXT_ZICSR 40 #define RISCV_ISA_EXT_ZIFENCEI 41 #define RISCV_ISA_EXT_ZIHPM 42 +#define RISCV_ISA_EXT_ZICBOP 43 #define RISCV_ISA_EXT_MAX 64 diff --git a/arch/riscv/include/asm/insn-def.h b/arch/riscv/include/asm/insn-def.h index 6960beb75f32..dc590d331894 100644 --- a/arch/riscv/include/asm/insn-def.h +++ b/arch/riscv/include/asm/insn-def.h @@ -134,6 +134,7 @@ #define RV_OPCODE_MISC_MEM RV_OPCODE(15) #define RV_OPCODE_SYSTEM RV_OPCODE(115) +#define RV_OPCODE_PREFETCH RV_OPCODE(19) #define HFENCE_VVMA(vaddr, asid) \ INSN_R(OPCODE_SYSTEM, FUNC3(0), FUNC7(17), \ @@ -196,4 +197,8 @@ INSN_I(OPCODE_MISC_MEM, FUNC3(2), __RD(0), \ RS1(base), SIMM12(4)) +#define CBO_prefetchw(base) \ + INSN_R(OPCODE_PREFETCH, FUNC3(6), FUNC7(0), \ + RD(x0), RS1(base), RS2(x0)) + #endif /* __ASM_INSN_DEF_H */ diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h index de9da852f78d..7ad3a24212e8 100644 --- a/arch/riscv/include/asm/processor.h +++ b/arch/riscv/include/asm/processor.h @@ -12,6 +12,8 @@ #include <vdso/processor.h> #include <asm/ptrace.h> +#include <asm/insn-def.h> +#include <asm/hwcap.h> #ifdef CONFIG_64BIT #define DEFAULT_MAP_WINDOW (UL(1) << (MMAP_VA_BITS - 1)) @@ -103,6 +105,17 @@ static inline void arch_thread_struct_whitelist(unsigned long *offset, #define KSTK_EIP(tsk) (ulong)(task_pt_regs(tsk)->epc) #define KSTK_ESP(tsk) (ulong)(task_pt_regs(tsk)->sp) +#define ARCH_HAS_PREFETCHW +#define PREFETCHW_ASM(base) ALTERNATIVE(__nops(1), \ + CBO_prefetchw(base), \ + 0, \ + RISCV_ISA_EXT_ZICBOP, \ + CONFIG_RISCV_ISA_ZICBOP) +static inline void prefetchw(const void *ptr) +{ + asm volatile(PREFETCHW_ASM(%0) + : : "r" (ptr) : "memory"); +} /* Do necessary setup to start up a newly executed thread. */ extern void start_thread(struct pt_regs *regs, diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index ef7b4fd9e876..e0b897db0b97 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -159,6 +159,7 @@ const struct riscv_isa_ext_data riscv_isa_ext[] = { __RISCV_ISA_EXT_DATA(h, RISCV_ISA_EXT_h), __RISCV_ISA_EXT_DATA(zicbom, RISCV_ISA_EXT_ZICBOM), __RISCV_ISA_EXT_DATA(zicboz, RISCV_ISA_EXT_ZICBOZ), + __RISCV_ISA_EXT_DATA(zicbop, RISCV_ISA_EXT_ZICBOP), __RISCV_ISA_EXT_DATA(zicntr, RISCV_ISA_EXT_ZICNTR), __RISCV_ISA_EXT_DATA(zicsr, RISCV_ISA_EXT_ZICSR), __RISCV_ISA_EXT_DATA(zifencei, RISCV_ISA_EXT_ZIFENCEI),