Message ID | 20231223042914.18599-7-andy.chiu@sifive.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | riscv: support kernel-mode Vector | expand |
On Sat, Dec 23, 2023 at 04:29:10AM +0000, Andy Chiu wrote: > Provide vectorized memcpy/memset/memmove to accelerate common memory > operations. Also, group them into V_OPT_TEMPLATE3 macro because their > setup/tear-down and fallback logics are the same. > > The optimal size for the kernel to preference Vector over scalar, > riscv_v_mem*_threshold, is only a heuristic for now. We can add DT > parsing if people feel the need of customizing it. > > The original implementation of Vector operations comes from > https://github.com/sifive/sifive-libc, which we agree to contribute to > Linux kernel. > > Signed-off-by: Andy Chiu <andy.chiu@sifive.com> > --- > Changelog v7: > - add __NO_FORTIFY to prevent conflicting function declaration with > macro for mem* functions. > Changelog v6: > - provide kconfig to set threshold for vectorized functions (Charlie) > - rename *thres to *threshold (Charlie) > Changelog v4: > - new patch since v4 > --- > arch/riscv/Kconfig | 24 ++++++++++++++++ > arch/riscv/lib/Makefile | 3 ++ > arch/riscv/lib/memcpy_vector.S | 29 +++++++++++++++++++ > arch/riscv/lib/memmove_vector.S | 49 ++++++++++++++++++++++++++++++++ > arch/riscv/lib/memset_vector.S | 33 +++++++++++++++++++++ > arch/riscv/lib/riscv_v_helpers.c | 26 +++++++++++++++++ > 6 files changed, 164 insertions(+) > create mode 100644 arch/riscv/lib/memcpy_vector.S > create mode 100644 arch/riscv/lib/memmove_vector.S > create mode 100644 arch/riscv/lib/memset_vector.S > > diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig > index 3c5ba05e8a2d..cba53dcc2ae0 100644 > --- a/arch/riscv/Kconfig > +++ b/arch/riscv/Kconfig > @@ -533,6 +533,30 @@ config RISCV_ISA_V_UCOPY_THRESHOLD > Prefer using vectorized copy_to_user()/copy_from_user() when the > workload size exceeds this value. > > +config RISCV_ISA_V_MEMSET_THRESHOLD > + int "Threshold size for vectorized memset()" > + depends on RISCV_ISA_V > + default 1280 > + help > + Prefer using vectorized memset() when the workload size exceeds this > + value. > + > +config RISCV_ISA_V_MEMCPY_THRESHOLD > + int "Threshold size for vectorized memcpy()" > + depends on RISCV_ISA_V > + default 768 > + help > + Prefer using vectorized memcpy() when the workload size exceeds this > + value. > + > +config RISCV_ISA_V_MEMMOVE_THRESHOLD > + int "Threshold size for vectorized memmove()" > + depends on RISCV_ISA_V > + default 512 > + help > + Prefer using vectorized memmove() when the workload size exceeds this > + value. > + > config TOOLCHAIN_HAS_ZBB > bool > default y > diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile > index c8a6787d5827..d389dbf285fe 100644 > --- a/arch/riscv/lib/Makefile > +++ b/arch/riscv/lib/Makefile > @@ -16,3 +16,6 @@ lib-$(CONFIG_RISCV_ISA_ZICBOZ) += clear_page.o > obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o > lib-$(CONFIG_RISCV_ISA_V) += xor.o > lib-$(CONFIG_RISCV_ISA_V) += riscv_v_helpers.o > +lib-$(CONFIG_RISCV_ISA_V) += memset_vector.o > +lib-$(CONFIG_RISCV_ISA_V) += memcpy_vector.o > +lib-$(CONFIG_RISCV_ISA_V) += memmove_vector.o > diff --git a/arch/riscv/lib/memcpy_vector.S b/arch/riscv/lib/memcpy_vector.S > new file mode 100644 > index 000000000000..4176b6e0a53c > --- /dev/null > +++ b/arch/riscv/lib/memcpy_vector.S > @@ -0,0 +1,29 @@ > +/* SPDX-License-Identifier: GPL-2.0-only */ > + > +#include <linux/linkage.h> > +#include <asm/asm.h> > + > +#define pDst a0 > +#define pSrc a1 > +#define iNum a2 > + > +#define iVL a3 > +#define pDstPtr a4 > + > +#define ELEM_LMUL_SETTING m8 > +#define vData v0 > + > + > +/* void *memcpy(void *, const void *, size_t) */ > +SYM_FUNC_START(__asm_memcpy_vector) > + mv pDstPtr, pDst > +loop: > + vsetvli iVL, iNum, e8, ELEM_LMUL_SETTING, ta, ma > + vle8.v vData, (pSrc) > + sub iNum, iNum, iVL > + add pSrc, pSrc, iVL > + vse8.v vData, (pDstPtr) > + add pDstPtr, pDstPtr, iVL > + bnez iNum, loop > + ret > +SYM_FUNC_END(__asm_memcpy_vector) > diff --git a/arch/riscv/lib/memmove_vector.S b/arch/riscv/lib/memmove_vector.S > new file mode 100644 > index 000000000000..4cea9d244dc9 > --- /dev/null > +++ b/arch/riscv/lib/memmove_vector.S > @@ -0,0 +1,49 @@ > +/* SPDX-License-Identifier: GPL-2.0-only */ > +#include <linux/linkage.h> > +#include <asm/asm.h> > + > +#define pDst a0 > +#define pSrc a1 > +#define iNum a2 > + > +#define iVL a3 > +#define pDstPtr a4 > +#define pSrcBackwardPtr a5 > +#define pDstBackwardPtr a6 > + > +#define ELEM_LMUL_SETTING m8 > +#define vData v0 > + > +SYM_FUNC_START(__asm_memmove_vector) > + > + mv pDstPtr, pDst > + > + bgeu pSrc, pDst, forward_copy_loop > + add pSrcBackwardPtr, pSrc, iNum > + add pDstBackwardPtr, pDst, iNum > + bltu pDst, pSrcBackwardPtr, backward_copy_loop > + > +forward_copy_loop: > + vsetvli iVL, iNum, e8, ELEM_LMUL_SETTING, ta, ma > + > + vle8.v vData, (pSrc) > + sub iNum, iNum, iVL > + add pSrc, pSrc, iVL > + vse8.v vData, (pDstPtr) > + add pDstPtr, pDstPtr, iVL > + > + bnez iNum, forward_copy_loop > + ret > + > +backward_copy_loop: > + vsetvli iVL, iNum, e8, ELEM_LMUL_SETTING, ta, ma > + > + sub pSrcBackwardPtr, pSrcBackwardPtr, iVL > + vle8.v vData, (pSrcBackwardPtr) > + sub iNum, iNum, iVL > + sub pDstBackwardPtr, pDstBackwardPtr, iVL > + vse8.v vData, (pDstBackwardPtr) > + bnez iNum, backward_copy_loop > + ret > + > +SYM_FUNC_END(__asm_memmove_vector) > diff --git a/arch/riscv/lib/memset_vector.S b/arch/riscv/lib/memset_vector.S > new file mode 100644 > index 000000000000..4611feed72ac > --- /dev/null > +++ b/arch/riscv/lib/memset_vector.S > @@ -0,0 +1,33 @@ > +/* SPDX-License-Identifier: GPL-2.0-only */ > +#include <linux/linkage.h> > +#include <asm/asm.h> > + > +#define pDst a0 > +#define iValue a1 > +#define iNum a2 > + > +#define iVL a3 > +#define iTemp a4 > +#define pDstPtr a5 > + > +#define ELEM_LMUL_SETTING m8 > +#define vData v0 > + > +/* void *memset(void *, int, size_t) */ > +SYM_FUNC_START(__asm_memset_vector) > + > + mv pDstPtr, pDst > + > + vsetvli iVL, iNum, e8, ELEM_LMUL_SETTING, ta, ma > + vmv.v.x vData, iValue > + > +loop: > + vse8.v vData, (pDstPtr) > + sub iNum, iNum, iVL > + add pDstPtr, pDstPtr, iVL > + vsetvli iVL, iNum, e8, ELEM_LMUL_SETTING, ta, ma > + bnez iNum, loop > + > + ret > + > +SYM_FUNC_END(__asm_memset_vector) > diff --git a/arch/riscv/lib/riscv_v_helpers.c b/arch/riscv/lib/riscv_v_helpers.c > index 6cac8f4e69e9..c62f333ba557 100644 > --- a/arch/riscv/lib/riscv_v_helpers.c > +++ b/arch/riscv/lib/riscv_v_helpers.c > @@ -3,9 +3,13 @@ > * Copyright (C) 2023 SiFive > * Author: Andy Chiu <andy.chiu@sifive.com> > */ > +#ifndef __NO_FORTIFY > +# define __NO_FORTIFY > +#endif > #include <linux/linkage.h> > #include <asm/asm.h> > > +#include <asm/string.h> > #include <asm/vector.h> > #include <asm/simd.h> > > @@ -42,3 +46,25 @@ asmlinkage int enter_vector_usercopy(void *dst, void *src, size_t n) > return fallback_scalar_usercopy(dst, src, n); > } > #endif > + > +#define V_OPT_TEMPLATE3(prefix, type_r, type_0, type_1) \ > +extern type_r __asm_##prefix##_vector(type_0, type_1, size_t n); \ > +type_r prefix(type_0 a0, type_1 a1, size_t n) \ > +{ \ > + type_r ret; \ > + if (has_vector() && may_use_simd() && \ > + n > riscv_v_##prefix##_threshold) { \ > + kernel_vector_begin(); \ > + ret = __asm_##prefix##_vector(a0, a1, n); \ > + kernel_vector_end(); \ > + return ret; \ > + } \ > + return __##prefix(a0, a1, n); \ > +} > + > +static size_t riscv_v_memset_threshold = CONFIG_RISCV_ISA_V_MEMSET_THRESHOLD; > +V_OPT_TEMPLATE3(memset, void *, void*, int) > +static size_t riscv_v_memcpy_threshold = CONFIG_RISCV_ISA_V_MEMCPY_THRESHOLD; > +V_OPT_TEMPLATE3(memcpy, void *, void*, const void *) > +static size_t riscv_v_memmove_threshold = CONFIG_RISCV_ISA_V_MEMMOVE_THRESHOLD; > +V_OPT_TEMPLATE3(memmove, void *, void*, const void *) > -- > 2.17.1 > Thank you for adding the kconfigs for the thresholds. Reviewed-by: Charlie Jenkins <charlie@rivosinc.com>
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 3c5ba05e8a2d..cba53dcc2ae0 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -533,6 +533,30 @@ config RISCV_ISA_V_UCOPY_THRESHOLD Prefer using vectorized copy_to_user()/copy_from_user() when the workload size exceeds this value. +config RISCV_ISA_V_MEMSET_THRESHOLD + int "Threshold size for vectorized memset()" + depends on RISCV_ISA_V + default 1280 + help + Prefer using vectorized memset() when the workload size exceeds this + value. + +config RISCV_ISA_V_MEMCPY_THRESHOLD + int "Threshold size for vectorized memcpy()" + depends on RISCV_ISA_V + default 768 + help + Prefer using vectorized memcpy() when the workload size exceeds this + value. + +config RISCV_ISA_V_MEMMOVE_THRESHOLD + int "Threshold size for vectorized memmove()" + depends on RISCV_ISA_V + default 512 + help + Prefer using vectorized memmove() when the workload size exceeds this + value. + config TOOLCHAIN_HAS_ZBB bool default y diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile index c8a6787d5827..d389dbf285fe 100644 --- a/arch/riscv/lib/Makefile +++ b/arch/riscv/lib/Makefile @@ -16,3 +16,6 @@ lib-$(CONFIG_RISCV_ISA_ZICBOZ) += clear_page.o obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o lib-$(CONFIG_RISCV_ISA_V) += xor.o lib-$(CONFIG_RISCV_ISA_V) += riscv_v_helpers.o +lib-$(CONFIG_RISCV_ISA_V) += memset_vector.o +lib-$(CONFIG_RISCV_ISA_V) += memcpy_vector.o +lib-$(CONFIG_RISCV_ISA_V) += memmove_vector.o diff --git a/arch/riscv/lib/memcpy_vector.S b/arch/riscv/lib/memcpy_vector.S new file mode 100644 index 000000000000..4176b6e0a53c --- /dev/null +++ b/arch/riscv/lib/memcpy_vector.S @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include <linux/linkage.h> +#include <asm/asm.h> + +#define pDst a0 +#define pSrc a1 +#define iNum a2 + +#define iVL a3 +#define pDstPtr a4 + +#define ELEM_LMUL_SETTING m8 +#define vData v0 + + +/* void *memcpy(void *, const void *, size_t) */ +SYM_FUNC_START(__asm_memcpy_vector) + mv pDstPtr, pDst +loop: + vsetvli iVL, iNum, e8, ELEM_LMUL_SETTING, ta, ma + vle8.v vData, (pSrc) + sub iNum, iNum, iVL + add pSrc, pSrc, iVL + vse8.v vData, (pDstPtr) + add pDstPtr, pDstPtr, iVL + bnez iNum, loop + ret +SYM_FUNC_END(__asm_memcpy_vector) diff --git a/arch/riscv/lib/memmove_vector.S b/arch/riscv/lib/memmove_vector.S new file mode 100644 index 000000000000..4cea9d244dc9 --- /dev/null +++ b/arch/riscv/lib/memmove_vector.S @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#include <linux/linkage.h> +#include <asm/asm.h> + +#define pDst a0 +#define pSrc a1 +#define iNum a2 + +#define iVL a3 +#define pDstPtr a4 +#define pSrcBackwardPtr a5 +#define pDstBackwardPtr a6 + +#define ELEM_LMUL_SETTING m8 +#define vData v0 + +SYM_FUNC_START(__asm_memmove_vector) + + mv pDstPtr, pDst + + bgeu pSrc, pDst, forward_copy_loop + add pSrcBackwardPtr, pSrc, iNum + add pDstBackwardPtr, pDst, iNum + bltu pDst, pSrcBackwardPtr, backward_copy_loop + +forward_copy_loop: + vsetvli iVL, iNum, e8, ELEM_LMUL_SETTING, ta, ma + + vle8.v vData, (pSrc) + sub iNum, iNum, iVL + add pSrc, pSrc, iVL + vse8.v vData, (pDstPtr) + add pDstPtr, pDstPtr, iVL + + bnez iNum, forward_copy_loop + ret + +backward_copy_loop: + vsetvli iVL, iNum, e8, ELEM_LMUL_SETTING, ta, ma + + sub pSrcBackwardPtr, pSrcBackwardPtr, iVL + vle8.v vData, (pSrcBackwardPtr) + sub iNum, iNum, iVL + sub pDstBackwardPtr, pDstBackwardPtr, iVL + vse8.v vData, (pDstBackwardPtr) + bnez iNum, backward_copy_loop + ret + +SYM_FUNC_END(__asm_memmove_vector) diff --git a/arch/riscv/lib/memset_vector.S b/arch/riscv/lib/memset_vector.S new file mode 100644 index 000000000000..4611feed72ac --- /dev/null +++ b/arch/riscv/lib/memset_vector.S @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#include <linux/linkage.h> +#include <asm/asm.h> + +#define pDst a0 +#define iValue a1 +#define iNum a2 + +#define iVL a3 +#define iTemp a4 +#define pDstPtr a5 + +#define ELEM_LMUL_SETTING m8 +#define vData v0 + +/* void *memset(void *, int, size_t) */ +SYM_FUNC_START(__asm_memset_vector) + + mv pDstPtr, pDst + + vsetvli iVL, iNum, e8, ELEM_LMUL_SETTING, ta, ma + vmv.v.x vData, iValue + +loop: + vse8.v vData, (pDstPtr) + sub iNum, iNum, iVL + add pDstPtr, pDstPtr, iVL + vsetvli iVL, iNum, e8, ELEM_LMUL_SETTING, ta, ma + bnez iNum, loop + + ret + +SYM_FUNC_END(__asm_memset_vector) diff --git a/arch/riscv/lib/riscv_v_helpers.c b/arch/riscv/lib/riscv_v_helpers.c index 6cac8f4e69e9..c62f333ba557 100644 --- a/arch/riscv/lib/riscv_v_helpers.c +++ b/arch/riscv/lib/riscv_v_helpers.c @@ -3,9 +3,13 @@ * Copyright (C) 2023 SiFive * Author: Andy Chiu <andy.chiu@sifive.com> */ +#ifndef __NO_FORTIFY +# define __NO_FORTIFY +#endif #include <linux/linkage.h> #include <asm/asm.h> +#include <asm/string.h> #include <asm/vector.h> #include <asm/simd.h> @@ -42,3 +46,25 @@ asmlinkage int enter_vector_usercopy(void *dst, void *src, size_t n) return fallback_scalar_usercopy(dst, src, n); } #endif + +#define V_OPT_TEMPLATE3(prefix, type_r, type_0, type_1) \ +extern type_r __asm_##prefix##_vector(type_0, type_1, size_t n); \ +type_r prefix(type_0 a0, type_1 a1, size_t n) \ +{ \ + type_r ret; \ + if (has_vector() && may_use_simd() && \ + n > riscv_v_##prefix##_threshold) { \ + kernel_vector_begin(); \ + ret = __asm_##prefix##_vector(a0, a1, n); \ + kernel_vector_end(); \ + return ret; \ + } \ + return __##prefix(a0, a1, n); \ +} + +static size_t riscv_v_memset_threshold = CONFIG_RISCV_ISA_V_MEMSET_THRESHOLD; +V_OPT_TEMPLATE3(memset, void *, void*, int) +static size_t riscv_v_memcpy_threshold = CONFIG_RISCV_ISA_V_MEMCPY_THRESHOLD; +V_OPT_TEMPLATE3(memcpy, void *, void*, const void *) +static size_t riscv_v_memmove_threshold = CONFIG_RISCV_ISA_V_MEMMOVE_THRESHOLD; +V_OPT_TEMPLATE3(memmove, void *, void*, const void *)
Provide vectorized memcpy/memset/memmove to accelerate common memory operations. Also, group them into V_OPT_TEMPLATE3 macro because their setup/tear-down and fallback logics are the same. The optimal size for the kernel to preference Vector over scalar, riscv_v_mem*_threshold, is only a heuristic for now. We can add DT parsing if people feel the need of customizing it. The original implementation of Vector operations comes from https://github.com/sifive/sifive-libc, which we agree to contribute to Linux kernel. Signed-off-by: Andy Chiu <andy.chiu@sifive.com> --- Changelog v7: - add __NO_FORTIFY to prevent conflicting function declaration with macro for mem* functions. Changelog v6: - provide kconfig to set threshold for vectorized functions (Charlie) - rename *thres to *threshold (Charlie) Changelog v4: - new patch since v4 --- arch/riscv/Kconfig | 24 ++++++++++++++++ arch/riscv/lib/Makefile | 3 ++ arch/riscv/lib/memcpy_vector.S | 29 +++++++++++++++++++ arch/riscv/lib/memmove_vector.S | 49 ++++++++++++++++++++++++++++++++ arch/riscv/lib/memset_vector.S | 33 +++++++++++++++++++++ arch/riscv/lib/riscv_v_helpers.c | 26 +++++++++++++++++ 6 files changed, 164 insertions(+) create mode 100644 arch/riscv/lib/memcpy_vector.S create mode 100644 arch/riscv/lib/memmove_vector.S create mode 100644 arch/riscv/lib/memset_vector.S