Message ID | 20231112095244.4015351-1-xiao.w.wang@intel.com (mailing list archive) |
---|---|
State | Accepted |
Commit | 55ca8d7aa2af3ebdb6f85cccf1b0703d031c1678 |
Headers | show |
Series | riscv: Optimize hweight API with Zbb extension | expand |
On Sun, Nov 12, 2023 at 05:52:44PM +0800, Xiao Wang wrote: > The Hamming Weight of a number is the total number of bits set in it, so > the cpop/cpopw instruction from Zbb extension can be used to accelerate > hweight() API. > > Signed-off-by: Xiao Wang <xiao.w.wang@intel.com> > --- > arch/riscv/include/asm/arch_hweight.h | 78 +++++++++++++++++++++++++++ > arch/riscv/include/asm/bitops.h | 4 +- > 2 files changed, 81 insertions(+), 1 deletion(-) > create mode 100644 arch/riscv/include/asm/arch_hweight.h > > diff --git a/arch/riscv/include/asm/arch_hweight.h b/arch/riscv/include/asm/arch_hweight.h > new file mode 100644 > index 000000000000..c20236a0725b > --- /dev/null > +++ b/arch/riscv/include/asm/arch_hweight.h > @@ -0,0 +1,78 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ > +/* > + * Based on arch/x86/include/asm/arch_hweight.h > + */ > + > +#ifndef _ASM_RISCV_HWEIGHT_H > +#define _ASM_RISCV_HWEIGHT_H > + > +#include <asm/alternative-macros.h> > +#include <asm/hwcap.h> > + > +#if (BITS_PER_LONG == 64) > +#define CPOPW "cpopw " > +#elif (BITS_PER_LONG == 32) > +#define CPOPW "cpop " > +#else > +#error "Unexpected BITS_PER_LONG" > +#endif > + > +static __always_inline unsigned int __arch_hweight32(unsigned int w) > +{ > +#ifdef CONFIG_RISCV_ISA_ZBB > + asm_volatile_goto(ALTERNATIVE("j %l[legacy]", "nop", 0, > + RISCV_ISA_EXT_ZBB, 1) > + : : : : legacy); > + > + asm (".option push\n" > + ".option arch,+zbb\n" > + CPOPW "%0, %0\n" > + ".option pop\n" > + : "+r" (w) : :); > + > + return w; > + > +legacy: > +#endif > + return __sw_hweight32(w); > +} > + > +static inline unsigned int __arch_hweight16(unsigned int w) > +{ > + return __arch_hweight32(w & 0xffff); > +} > + > +static inline unsigned int __arch_hweight8(unsigned int w) > +{ > + return __arch_hweight32(w & 0xff); > +} > + > +#if BITS_PER_LONG == 64 > +static __always_inline unsigned long __arch_hweight64(__u64 w) > +{ > +# ifdef CONFIG_RISCV_ISA_ZBB > + asm_volatile_goto(ALTERNATIVE("j %l[legacy]", "nop", 0, > + RISCV_ISA_EXT_ZBB, 1) > + : : : : legacy); > + > + asm (".option push\n" > + ".option arch,+zbb\n" > + "cpop %0, %0\n" > + ".option pop\n" > + : "+r" (w) : :); > + > + return w; > + > +legacy: > +# endif > + return __sw_hweight64(w); > +} > +#else /* BITS_PER_LONG == 64 */ > +static inline unsigned long __arch_hweight64(__u64 w) > +{ > + return __arch_hweight32((u32)w) + > + __arch_hweight32((u32)(w >> 32)); > +} > +#endif /* !(BITS_PER_LONG == 64) */ > + > +#endif /* _ASM_RISCV_HWEIGHT_H */ > diff --git a/arch/riscv/include/asm/bitops.h b/arch/riscv/include/asm/bitops.h > index b212c2708cda..f7c167646460 100644 > --- a/arch/riscv/include/asm/bitops.h > +++ b/arch/riscv/include/asm/bitops.h > @@ -271,7 +271,9 @@ static __always_inline int variable_fls(unsigned int x) > #include <asm-generic/bitops/fls64.h> > #include <asm-generic/bitops/sched.h> > > -#include <asm-generic/bitops/hweight.h> > +#include <asm/arch_hweight.h> > + > +#include <asm-generic/bitops/const_hweight.h> > > #if (BITS_PER_LONG == 64) > #define __AMO(op) "amo" #op ".d" > -- > 2.25.1 > > > _______________________________________________ > linux-riscv mailing list > linux-riscv@lists.infradead.org > http://lists.infradead.org/mailman/listinfo/linux-riscv Reviewed-by: Charlie Jenkins <charlie@rivosinc.com>
Hello: This patch was applied to riscv/linux.git (fixes) by Palmer Dabbelt <palmer@rivosinc.com>: On Sun, 12 Nov 2023 17:52:44 +0800 you wrote: > The Hamming Weight of a number is the total number of bits set in it, so > the cpop/cpopw instruction from Zbb extension can be used to accelerate > hweight() API. > > Signed-off-by: Xiao Wang <xiao.w.wang@intel.com> > --- > arch/riscv/include/asm/arch_hweight.h | 78 +++++++++++++++++++++++++++ > arch/riscv/include/asm/bitops.h | 4 +- > 2 files changed, 81 insertions(+), 1 deletion(-) > create mode 100644 arch/riscv/include/asm/arch_hweight.h Here is the summary with links: - riscv: Optimize hweight API with Zbb extension https://git.kernel.org/riscv/c/55ca8d7aa2af You are awesome, thank you!
diff --git a/arch/riscv/include/asm/arch_hweight.h b/arch/riscv/include/asm/arch_hweight.h new file mode 100644 index 000000000000..c20236a0725b --- /dev/null +++ b/arch/riscv/include/asm/arch_hweight.h @@ -0,0 +1,78 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Based on arch/x86/include/asm/arch_hweight.h + */ + +#ifndef _ASM_RISCV_HWEIGHT_H +#define _ASM_RISCV_HWEIGHT_H + +#include <asm/alternative-macros.h> +#include <asm/hwcap.h> + +#if (BITS_PER_LONG == 64) +#define CPOPW "cpopw " +#elif (BITS_PER_LONG == 32) +#define CPOPW "cpop " +#else +#error "Unexpected BITS_PER_LONG" +#endif + +static __always_inline unsigned int __arch_hweight32(unsigned int w) +{ +#ifdef CONFIG_RISCV_ISA_ZBB + asm_volatile_goto(ALTERNATIVE("j %l[legacy]", "nop", 0, + RISCV_ISA_EXT_ZBB, 1) + : : : : legacy); + + asm (".option push\n" + ".option arch,+zbb\n" + CPOPW "%0, %0\n" + ".option pop\n" + : "+r" (w) : :); + + return w; + +legacy: +#endif + return __sw_hweight32(w); +} + +static inline unsigned int __arch_hweight16(unsigned int w) +{ + return __arch_hweight32(w & 0xffff); +} + +static inline unsigned int __arch_hweight8(unsigned int w) +{ + return __arch_hweight32(w & 0xff); +} + +#if BITS_PER_LONG == 64 +static __always_inline unsigned long __arch_hweight64(__u64 w) +{ +# ifdef CONFIG_RISCV_ISA_ZBB + asm_volatile_goto(ALTERNATIVE("j %l[legacy]", "nop", 0, + RISCV_ISA_EXT_ZBB, 1) + : : : : legacy); + + asm (".option push\n" + ".option arch,+zbb\n" + "cpop %0, %0\n" + ".option pop\n" + : "+r" (w) : :); + + return w; + +legacy: +# endif + return __sw_hweight64(w); +} +#else /* BITS_PER_LONG == 64 */ +static inline unsigned long __arch_hweight64(__u64 w) +{ + return __arch_hweight32((u32)w) + + __arch_hweight32((u32)(w >> 32)); +} +#endif /* !(BITS_PER_LONG == 64) */ + +#endif /* _ASM_RISCV_HWEIGHT_H */ diff --git a/arch/riscv/include/asm/bitops.h b/arch/riscv/include/asm/bitops.h index b212c2708cda..f7c167646460 100644 --- a/arch/riscv/include/asm/bitops.h +++ b/arch/riscv/include/asm/bitops.h @@ -271,7 +271,9 @@ static __always_inline int variable_fls(unsigned int x) #include <asm-generic/bitops/fls64.h> #include <asm-generic/bitops/sched.h> -#include <asm-generic/bitops/hweight.h> +#include <asm/arch_hweight.h> + +#include <asm-generic/bitops/const_hweight.h> #if (BITS_PER_LONG == 64) #define __AMO(op) "amo" #op ".d"
The Hamming Weight of a number is the total number of bits set in it, so the cpop/cpopw instruction from Zbb extension can be used to accelerate hweight() API. Signed-off-by: Xiao Wang <xiao.w.wang@intel.com> --- arch/riscv/include/asm/arch_hweight.h | 78 +++++++++++++++++++++++++++ arch/riscv/include/asm/bitops.h | 4 +- 2 files changed, 81 insertions(+), 1 deletion(-) create mode 100644 arch/riscv/include/asm/arch_hweight.h