Message ID | 20231214155721.1753-7-andy.chiu@sifive.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | riscv: support kernel-mode Vector | expand |
Context | Check | Description |
---|---|---|
conchuod/vmtest-for-next-PR | fail | PR summary |
conchuod/patch-6-test-1 | success | .github/scripts/patches/build_rv32_defconfig.sh |
conchuod/patch-6-test-2 | fail | .github/scripts/patches/build_rv64_clang_allmodconfig.sh |
conchuod/patch-6-test-3 | fail | .github/scripts/patches/build_rv64_gcc_allmodconfig.sh |
conchuod/patch-6-test-4 | fail | .github/scripts/patches/build_rv64_nommu_k210_defconfig.sh |
conchuod/patch-6-test-5 | fail | .github/scripts/patches/build_rv64_nommu_virt_defconfig.sh |
conchuod/patch-6-test-6 | fail | .github/scripts/patches/checkpatch.sh |
conchuod/patch-6-test-7 | success | .github/scripts/patches/dtb_warn_rv64.sh |
conchuod/patch-6-test-8 | success | .github/scripts/patches/header_inline.sh |
conchuod/patch-6-test-9 | success | .github/scripts/patches/kdoc.sh |
conchuod/patch-6-test-10 | success | .github/scripts/patches/module_param.sh |
conchuod/patch-6-test-11 | success | .github/scripts/patches/verify_fixes.sh |
conchuod/patch-6-test-12 | success | .github/scripts/patches/verify_signedoff.sh |
On Thu, Dec 14, 2023 at 03:57:21PM +0000, Andy Chiu wrote: > Provide vectorized memcpy/memset/memmove to accelerate common memory > operations. Also, group them into V_OPT_TEMPLATE3 macro because their > setup/tear-down and fallback logics are the same. > > The original implementation of Vector operations comes from > https://github.com/sifive/sifive-libc, which we agree to contribute to > Linux kernel. > > Signed-off-by: Andy Chiu <andy.chiu@sifive.com> > --- > Changelog v4: > - new patch since v4 > --- > arch/riscv/lib/Makefile | 3 ++ > arch/riscv/lib/memcpy_vector.S | 29 +++++++++++++++++++ > arch/riscv/lib/memmove_vector.S | 49 ++++++++++++++++++++++++++++++++ > arch/riscv/lib/memset_vector.S | 33 +++++++++++++++++++++ > arch/riscv/lib/riscv_v_helpers.c | 21 ++++++++++++++ > 5 files changed, 135 insertions(+) > create mode 100644 arch/riscv/lib/memcpy_vector.S > create mode 100644 arch/riscv/lib/memmove_vector.S > create mode 100644 arch/riscv/lib/memset_vector.S > > diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile > index 1fe8d797e0f2..3111863afd2e 100644 > --- a/arch/riscv/lib/Makefile > +++ b/arch/riscv/lib/Makefile > @@ -14,3 +14,6 @@ obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o > lib-$(CONFIG_RISCV_ISA_V) += xor.o > lib-$(CONFIG_RISCV_ISA_V) += riscv_v_helpers.o > lib-$(CONFIG_RISCV_ISA_V) += uaccess_vector.o > +lib-$(CONFIG_RISCV_ISA_V) += memset_vector.o > +lib-$(CONFIG_RISCV_ISA_V) += memcpy_vector.o > +lib-$(CONFIG_RISCV_ISA_V) += memmove_vector.o > diff --git a/arch/riscv/lib/memcpy_vector.S b/arch/riscv/lib/memcpy_vector.S > new file mode 100644 > index 000000000000..4176b6e0a53c > --- /dev/null > +++ b/arch/riscv/lib/memcpy_vector.S > @@ -0,0 +1,29 @@ > +/* SPDX-License-Identifier: GPL-2.0-only */ > + > +#include <linux/linkage.h> > +#include <asm/asm.h> > + > +#define pDst a0 > +#define pSrc a1 > +#define iNum a2 > + > +#define iVL a3 > +#define pDstPtr a4 > + > +#define ELEM_LMUL_SETTING m8 > +#define vData v0 > + > + > +/* void *memcpy(void *, const void *, size_t) */ > +SYM_FUNC_START(__asm_memcpy_vector) > + mv pDstPtr, pDst > +loop: > + vsetvli iVL, iNum, e8, ELEM_LMUL_SETTING, ta, ma > + vle8.v vData, (pSrc) > + sub iNum, iNum, iVL > + add pSrc, pSrc, iVL > + vse8.v vData, (pDstPtr) > + add pDstPtr, pDstPtr, iVL > + bnez iNum, loop > + ret > +SYM_FUNC_END(__asm_memcpy_vector) > diff --git a/arch/riscv/lib/memmove_vector.S b/arch/riscv/lib/memmove_vector.S > new file mode 100644 > index 000000000000..4cea9d244dc9 > --- /dev/null > +++ b/arch/riscv/lib/memmove_vector.S > @@ -0,0 +1,49 @@ > +/* SPDX-License-Identifier: GPL-2.0-only */ > +#include <linux/linkage.h> > +#include <asm/asm.h> > + > +#define pDst a0 > +#define pSrc a1 > +#define iNum a2 > + > +#define iVL a3 > +#define pDstPtr a4 > +#define pSrcBackwardPtr a5 > +#define pDstBackwardPtr a6 > + > +#define ELEM_LMUL_SETTING m8 > +#define vData v0 > + > +SYM_FUNC_START(__asm_memmove_vector) > + > + mv pDstPtr, pDst > + > + bgeu pSrc, pDst, forward_copy_loop > + add pSrcBackwardPtr, pSrc, iNum > + add pDstBackwardPtr, pDst, iNum > + bltu pDst, pSrcBackwardPtr, backward_copy_loop > + > +forward_copy_loop: > + vsetvli iVL, iNum, e8, ELEM_LMUL_SETTING, ta, ma > + > + vle8.v vData, (pSrc) > + sub iNum, iNum, iVL > + add pSrc, pSrc, iVL > + vse8.v vData, (pDstPtr) > + add pDstPtr, pDstPtr, iVL > + > + bnez iNum, forward_copy_loop > + ret > + > +backward_copy_loop: > + vsetvli iVL, iNum, e8, ELEM_LMUL_SETTING, ta, ma > + > + sub pSrcBackwardPtr, pSrcBackwardPtr, iVL > + vle8.v vData, (pSrcBackwardPtr) > + sub iNum, iNum, iVL > + sub pDstBackwardPtr, pDstBackwardPtr, iVL > + vse8.v vData, (pDstBackwardPtr) > + bnez iNum, backward_copy_loop > + ret > + > +SYM_FUNC_END(__asm_memmove_vector) > diff --git a/arch/riscv/lib/memset_vector.S b/arch/riscv/lib/memset_vector.S > new file mode 100644 > index 000000000000..4611feed72ac > --- /dev/null > +++ b/arch/riscv/lib/memset_vector.S > @@ -0,0 +1,33 @@ > +/* SPDX-License-Identifier: GPL-2.0-only */ > +#include <linux/linkage.h> > +#include <asm/asm.h> > + > +#define pDst a0 > +#define iValue a1 > +#define iNum a2 > + > +#define iVL a3 > +#define iTemp a4 > +#define pDstPtr a5 > + > +#define ELEM_LMUL_SETTING m8 > +#define vData v0 > + > +/* void *memset(void *, int, size_t) */ > +SYM_FUNC_START(__asm_memset_vector) > + > + mv pDstPtr, pDst > + > + vsetvli iVL, iNum, e8, ELEM_LMUL_SETTING, ta, ma > + vmv.v.x vData, iValue > + > +loop: > + vse8.v vData, (pDstPtr) > + sub iNum, iNum, iVL > + add pDstPtr, pDstPtr, iVL > + vsetvli iVL, iNum, e8, ELEM_LMUL_SETTING, ta, ma > + bnez iNum, loop > + > + ret > + > +SYM_FUNC_END(__asm_memset_vector) > diff --git a/arch/riscv/lib/riscv_v_helpers.c b/arch/riscv/lib/riscv_v_helpers.c > index d763b9c69fb7..12e8c5deb013 100644 > --- a/arch/riscv/lib/riscv_v_helpers.c > +++ b/arch/riscv/lib/riscv_v_helpers.c > @@ -36,3 +36,24 @@ asmlinkage int enter_vector_usercopy(void *dst, void *src, size_t n) > fallback: > return fallback_scalar_usercopy(dst, src, n); > } > + > +#define V_OPT_TEMPLATE3(prefix, type_r, type_0, type_1) \ > +extern type_r __asm_##prefix##_vector(type_0, type_1, size_t n); \ > +type_r prefix(type_0 a0, type_1 a1, size_t n) \ > +{ \ > + type_r ret; \ > + if (has_vector() && may_use_simd() && n > riscv_v_##prefix##_thres) { \ I forgot to bring it up on the other patch, but the phrase "thres" is not intuitive to me. I think spelling threshold out is better, or using "thresh" instead would make this much more clear. > + kernel_vector_begin(); \ > + ret = __asm_##prefix##_vector(a0, a1, n); \ > + kernel_vector_end(); \ > + return ret; \ > + } \ > + return __##prefix(a0, a1, n); \ > +} > + > +static size_t riscv_v_memset_thres = 1280; > +V_OPT_TEMPLATE3(memset, void *, void*, int) > +static size_t riscv_v_memcpy_thres = 768; > +V_OPT_TEMPLATE3(memcpy, void *, void*, const void *) > +static size_t riscv_v_memmove_thres = 512; How were these values selected? I would imagine that this could be different for different vector hardware and it might be valuable to make these the default values but allow a kconfig option to change it. - Charlie > +V_OPT_TEMPLATE3(memmove, void *, void*, const void *) > -- > 2.17.1 >
diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile index 1fe8d797e0f2..3111863afd2e 100644 --- a/arch/riscv/lib/Makefile +++ b/arch/riscv/lib/Makefile @@ -14,3 +14,6 @@ obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o lib-$(CONFIG_RISCV_ISA_V) += xor.o lib-$(CONFIG_RISCV_ISA_V) += riscv_v_helpers.o lib-$(CONFIG_RISCV_ISA_V) += uaccess_vector.o +lib-$(CONFIG_RISCV_ISA_V) += memset_vector.o +lib-$(CONFIG_RISCV_ISA_V) += memcpy_vector.o +lib-$(CONFIG_RISCV_ISA_V) += memmove_vector.o diff --git a/arch/riscv/lib/memcpy_vector.S b/arch/riscv/lib/memcpy_vector.S new file mode 100644 index 000000000000..4176b6e0a53c --- /dev/null +++ b/arch/riscv/lib/memcpy_vector.S @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include <linux/linkage.h> +#include <asm/asm.h> + +#define pDst a0 +#define pSrc a1 +#define iNum a2 + +#define iVL a3 +#define pDstPtr a4 + +#define ELEM_LMUL_SETTING m8 +#define vData v0 + + +/* void *memcpy(void *, const void *, size_t) */ +SYM_FUNC_START(__asm_memcpy_vector) + mv pDstPtr, pDst +loop: + vsetvli iVL, iNum, e8, ELEM_LMUL_SETTING, ta, ma + vle8.v vData, (pSrc) + sub iNum, iNum, iVL + add pSrc, pSrc, iVL + vse8.v vData, (pDstPtr) + add pDstPtr, pDstPtr, iVL + bnez iNum, loop + ret +SYM_FUNC_END(__asm_memcpy_vector) diff --git a/arch/riscv/lib/memmove_vector.S b/arch/riscv/lib/memmove_vector.S new file mode 100644 index 000000000000..4cea9d244dc9 --- /dev/null +++ b/arch/riscv/lib/memmove_vector.S @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#include <linux/linkage.h> +#include <asm/asm.h> + +#define pDst a0 +#define pSrc a1 +#define iNum a2 + +#define iVL a3 +#define pDstPtr a4 +#define pSrcBackwardPtr a5 +#define pDstBackwardPtr a6 + +#define ELEM_LMUL_SETTING m8 +#define vData v0 + +SYM_FUNC_START(__asm_memmove_vector) + + mv pDstPtr, pDst + + bgeu pSrc, pDst, forward_copy_loop + add pSrcBackwardPtr, pSrc, iNum + add pDstBackwardPtr, pDst, iNum + bltu pDst, pSrcBackwardPtr, backward_copy_loop + +forward_copy_loop: + vsetvli iVL, iNum, e8, ELEM_LMUL_SETTING, ta, ma + + vle8.v vData, (pSrc) + sub iNum, iNum, iVL + add pSrc, pSrc, iVL + vse8.v vData, (pDstPtr) + add pDstPtr, pDstPtr, iVL + + bnez iNum, forward_copy_loop + ret + +backward_copy_loop: + vsetvli iVL, iNum, e8, ELEM_LMUL_SETTING, ta, ma + + sub pSrcBackwardPtr, pSrcBackwardPtr, iVL + vle8.v vData, (pSrcBackwardPtr) + sub iNum, iNum, iVL + sub pDstBackwardPtr, pDstBackwardPtr, iVL + vse8.v vData, (pDstBackwardPtr) + bnez iNum, backward_copy_loop + ret + +SYM_FUNC_END(__asm_memmove_vector) diff --git a/arch/riscv/lib/memset_vector.S b/arch/riscv/lib/memset_vector.S new file mode 100644 index 000000000000..4611feed72ac --- /dev/null +++ b/arch/riscv/lib/memset_vector.S @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#include <linux/linkage.h> +#include <asm/asm.h> + +#define pDst a0 +#define iValue a1 +#define iNum a2 + +#define iVL a3 +#define iTemp a4 +#define pDstPtr a5 + +#define ELEM_LMUL_SETTING m8 +#define vData v0 + +/* void *memset(void *, int, size_t) */ +SYM_FUNC_START(__asm_memset_vector) + + mv pDstPtr, pDst + + vsetvli iVL, iNum, e8, ELEM_LMUL_SETTING, ta, ma + vmv.v.x vData, iValue + +loop: + vse8.v vData, (pDstPtr) + sub iNum, iNum, iVL + add pDstPtr, pDstPtr, iVL + vsetvli iVL, iNum, e8, ELEM_LMUL_SETTING, ta, ma + bnez iNum, loop + + ret + +SYM_FUNC_END(__asm_memset_vector) diff --git a/arch/riscv/lib/riscv_v_helpers.c b/arch/riscv/lib/riscv_v_helpers.c index d763b9c69fb7..12e8c5deb013 100644 --- a/arch/riscv/lib/riscv_v_helpers.c +++ b/arch/riscv/lib/riscv_v_helpers.c @@ -36,3 +36,24 @@ asmlinkage int enter_vector_usercopy(void *dst, void *src, size_t n) fallback: return fallback_scalar_usercopy(dst, src, n); } + +#define V_OPT_TEMPLATE3(prefix, type_r, type_0, type_1) \ +extern type_r __asm_##prefix##_vector(type_0, type_1, size_t n); \ +type_r prefix(type_0 a0, type_1 a1, size_t n) \ +{ \ + type_r ret; \ + if (has_vector() && may_use_simd() && n > riscv_v_##prefix##_thres) { \ + kernel_vector_begin(); \ + ret = __asm_##prefix##_vector(a0, a1, n); \ + kernel_vector_end(); \ + return ret; \ + } \ + return __##prefix(a0, a1, n); \ +} + +static size_t riscv_v_memset_thres = 1280; +V_OPT_TEMPLATE3(memset, void *, void*, int) +static size_t riscv_v_memcpy_thres = 768; +V_OPT_TEMPLATE3(memcpy, void *, void*, const void *) +static size_t riscv_v_memmove_thres = 512; +V_OPT_TEMPLATE3(memmove, void *, void*, const void *)
Provide vectorized memcpy/memset/memmove to accelerate common memory operations. Also, group them into V_OPT_TEMPLATE3 macro because their setup/tear-down and fallback logics are the same. The original implementation of Vector operations comes from https://github.com/sifive/sifive-libc, which we agree to contribute to Linux kernel. Signed-off-by: Andy Chiu <andy.chiu@sifive.com> --- Changelog v4: - new patch since v4 --- arch/riscv/lib/Makefile | 3 ++ arch/riscv/lib/memcpy_vector.S | 29 +++++++++++++++++++ arch/riscv/lib/memmove_vector.S | 49 ++++++++++++++++++++++++++++++++ arch/riscv/lib/memset_vector.S | 33 +++++++++++++++++++++ arch/riscv/lib/riscv_v_helpers.c | 21 ++++++++++++++ 5 files changed, 135 insertions(+) create mode 100644 arch/riscv/lib/memcpy_vector.S create mode 100644 arch/riscv/lib/memmove_vector.S create mode 100644 arch/riscv/lib/memset_vector.S