Message ID | 20230715150032.6917-4-andy.chiu@sifive.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | riscv: support kernel-mode Vector | expand |
Context | Check | Description |
---|---|---|
conchuod/cover_letter | success | Series has a cover letter |
conchuod/tree_selection | success | Guessed tree name to be for-next at HEAD 471aba2e4760 |
conchuod/fixes_present | success | Fixes tag not required for -next series |
conchuod/maintainers_pattern | success | MAINTAINERS pattern errors before the patch: 4 and now 4 |
conchuod/verify_signedoff | success | Signed-off-by tag matches author and committer |
conchuod/kdoc | success | Errors and warnings before: 0 this patch: 0 |
conchuod/build_rv64_clang_allmodconfig | success | Errors and warnings before: 9 this patch: 9 |
conchuod/module_param | success | Was 0 now: 0 |
conchuod/build_rv64_gcc_allmodconfig | success | Errors and warnings before: 9 this patch: 9 |
conchuod/build_rv32_defconfig | success | Build OK |
conchuod/dtb_warn_rv64 | success | Errors and warnings before: 3 this patch: 3 |
conchuod/header_inline | fail | Detected static functions without inline keyword in header files: 1 |
conchuod/checkpatch | warning | WARNING: added, moved or deleted file(s), does MAINTAINERS need updating? |
conchuod/build_rv64_nommu_k210_defconfig | success | Build OK |
conchuod/verify_fixes | success | No Fixes tag |
conchuod/build_rv64_nommu_virt_defconfig | success | Build OK |
On Sat, Jul 15, 2023 at 03:00:29PM +0000, Andy Chiu wrote: > From: Greentime Hu <greentime.hu@sifive.com> > > This patch adds support for vector optimized XOR and it is tested in > qemu. Since this patch was originally written, has it been tested in hardware? > Co-developed-by: Han-Kuan Chen <hankuan.chen@sifive.com> > Signed-off-by: Han-Kuan Chen <hankuan.chen@sifive.com> > Signed-off-by: Greentime Hu <greentime.hu@sifive.com> > Signed-off-by: Andy Chiu <andy.chiu@sifive.com> > --- > arch/riscv/include/asm/xor.h | 82 ++++++++++++++++++++++++++++++++++++ > arch/riscv/lib/Makefile | 1 + > arch/riscv/lib/xor.S | 81 +++++++++++++++++++++++++++++++++++ > 3 files changed, 164 insertions(+) > create mode 100644 arch/riscv/include/asm/xor.h > create mode 100644 arch/riscv/lib/xor.S > > diff --git a/arch/riscv/include/asm/xor.h b/arch/riscv/include/asm/xor.h > new file mode 100644 > index 000000000000..81b8837fa161 > --- /dev/null > +++ b/arch/riscv/include/asm/xor.h > +static void xor_rvv_2(unsigned long bytes, unsigned long *__restrict p1, > + const unsigned long *__restrict p2) > +static void xor_rvv_3(unsigned long bytes, unsigned long *__restrict p1, > + const unsigned long *__restrict p2, > + const unsigned long *__restrict p3) > +static void xor_rvv_4(unsigned long bytes, unsigned long *__restrict p1, > + const unsigned long *__restrict p2, > + const unsigned long *__restrict p3, > + const unsigned long *__restrict p4) > + > +static void xor_rvv_5(unsigned long bytes, unsigned long *__restrict p1, > + const unsigned long *__restrict p2, > + const unsigned long *__restrict p3, > + const unsigned long *__restrict p4, > + const unsigned long *__restrict p5) > + > +static struct xor_block_template xor_block_rvv = { > + .name = "rvv", > + .do_2 = xor_rvv_2, > + .do_3 = xor_rvv_3, > + .do_4 = xor_rvv_4, > + .do_5 = xor_rvv_5 > +}; Same naming scheme comments as the main vector patchset and 2/6 apply here too.
On Mon, Jul 17, 2023 at 6:26 PM Conor Dooley <conor.dooley@microchip.com> wrote: > > On Sat, Jul 15, 2023 at 03:00:29PM +0000, Andy Chiu wrote: > > From: Greentime Hu <greentime.hu@sifive.com> > > > > This patch adds support for vector optimized XOR and it is tested in > > qemu. > > Since this patch was originally written, has it been tested in hardware? We've run it on internal FPGAs but FPGAs don't count, right? ;) > > > Co-developed-by: Han-Kuan Chen <hankuan.chen@sifive.com> > > Signed-off-by: Han-Kuan Chen <hankuan.chen@sifive.com> > > Signed-off-by: Greentime Hu <greentime.hu@sifive.com> > > Signed-off-by: Andy Chiu <andy.chiu@sifive.com> > > --- > > arch/riscv/include/asm/xor.h | 82 ++++++++++++++++++++++++++++++++++++ > > arch/riscv/lib/Makefile | 1 + > > arch/riscv/lib/xor.S | 81 +++++++++++++++++++++++++++++++++++ > > 3 files changed, 164 insertions(+) > > create mode 100644 arch/riscv/include/asm/xor.h > > create mode 100644 arch/riscv/lib/xor.S > > > > diff --git a/arch/riscv/include/asm/xor.h b/arch/riscv/include/asm/xor.h > > new file mode 100644 > > index 000000000000..81b8837fa161 > > --- /dev/null > > +++ b/arch/riscv/include/asm/xor.h > > > +static void xor_rvv_2(unsigned long bytes, unsigned long *__restrict p1, > > + const unsigned long *__restrict p2) > > > +static void xor_rvv_3(unsigned long bytes, unsigned long *__restrict p1, > > + const unsigned long *__restrict p2, > > + const unsigned long *__restrict p3) > > > +static void xor_rvv_4(unsigned long bytes, unsigned long *__restrict p1, > > + const unsigned long *__restrict p2, > > + const unsigned long *__restrict p3, > > + const unsigned long *__restrict p4) > > > + > > +static void xor_rvv_5(unsigned long bytes, unsigned long *__restrict p1, > > + const unsigned long *__restrict p2, > > + const unsigned long *__restrict p3, > > + const unsigned long *__restrict p4, > > + const unsigned long *__restrict p5) > > > + > > +static struct xor_block_template xor_block_rvv = { > > + .name = "rvv", > > + .do_2 = xor_rvv_2, > > + .do_3 = xor_rvv_3, > > + .do_4 = xor_rvv_4, > > + .do_5 = xor_rvv_5 > > +}; > > Same naming scheme comments as the main vector patchset and 2/6 apply > here too. Yep, I'm doing s/xor_rvv/xor_vector Thanks, Andy
diff --git a/arch/riscv/include/asm/xor.h b/arch/riscv/include/asm/xor.h new file mode 100644 index 000000000000..81b8837fa161 --- /dev/null +++ b/arch/riscv/include/asm/xor.h @@ -0,0 +1,82 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2021 SiFive + */ + +#include <linux/hardirq.h> +#include <asm-generic/xor.h> +#ifdef CONFIG_RISCV_ISA_V +#include <asm/vector.h> +#include <asm/switch_to.h> + +void xor_regs_2_(unsigned long bytes, unsigned long *__restrict p1, + const unsigned long *__restrict p2); +void xor_regs_3_(unsigned long bytes, unsigned long *__restrict p1, + const unsigned long *__restrict p2, + const unsigned long *__restrict p3); +void xor_regs_4_(unsigned long bytes, unsigned long *__restrict p1, + const unsigned long *__restrict p2, + const unsigned long *__restrict p3, + const unsigned long *__restrict p4); +void xor_regs_5_(unsigned long bytes, unsigned long *__restrict p1, + const unsigned long *__restrict p2, + const unsigned long *__restrict p3, + const unsigned long *__restrict p4, + const unsigned long *__restrict p5); + +static void xor_rvv_2(unsigned long bytes, unsigned long *__restrict p1, + const unsigned long *__restrict p2) +{ + kernel_rvv_begin(); + xor_regs_2_(bytes, p1, p2); + kernel_rvv_end(); +} + +static void xor_rvv_3(unsigned long bytes, unsigned long *__restrict p1, + const unsigned long *__restrict p2, + const unsigned long *__restrict p3) +{ + kernel_rvv_begin(); + xor_regs_3_(bytes, p1, p2, p3); + kernel_rvv_end(); +} + +static void xor_rvv_4(unsigned long bytes, unsigned long *__restrict p1, + const unsigned long *__restrict p2, + const unsigned long *__restrict p3, + const unsigned long *__restrict p4) +{ + kernel_rvv_begin(); + xor_regs_4_(bytes, p1, p2, p3, p4); + kernel_rvv_end(); +} + +static void xor_rvv_5(unsigned long bytes, unsigned long *__restrict p1, + const unsigned long *__restrict p2, + const unsigned long *__restrict p3, + const unsigned long *__restrict p4, + const unsigned long *__restrict p5) +{ + kernel_rvv_begin(); + xor_regs_5_(bytes, p1, p2, p3, p4, p5); + kernel_rvv_end(); +} + +static struct xor_block_template xor_block_rvv = { + .name = "rvv", + .do_2 = xor_rvv_2, + .do_3 = xor_rvv_3, + .do_4 = xor_rvv_4, + .do_5 = xor_rvv_5 +}; + +#undef XOR_TRY_TEMPLATES +#define XOR_TRY_TEMPLATES \ + do { \ + xor_speed(&xor_block_8regs); \ + xor_speed(&xor_block_32regs); \ + if (has_vector()) { \ + xor_speed(&xor_block_rvv);\ + } \ + } while (0) +#endif diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile index 26cb2502ecf8..494f9cd1a00c 100644 --- a/arch/riscv/lib/Makefile +++ b/arch/riscv/lib/Makefile @@ -11,3 +11,4 @@ lib-$(CONFIG_64BIT) += tishift.o lib-$(CONFIG_RISCV_ISA_ZICBOZ) += clear_page.o obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o +lib-$(CONFIG_RISCV_ISA_V) += xor.o diff --git a/arch/riscv/lib/xor.S b/arch/riscv/lib/xor.S new file mode 100644 index 000000000000..3bc059e18171 --- /dev/null +++ b/arch/riscv/lib/xor.S @@ -0,0 +1,81 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2021 SiFive + */ +#include <linux/linkage.h> +#include <asm-generic/export.h> +#include <asm/asm.h> + +ENTRY(xor_regs_2_) + vsetvli a3, a0, e8, m8, ta, ma + vle8.v v0, (a1) + vle8.v v8, (a2) + sub a0, a0, a3 + vxor.vv v16, v0, v8 + add a2, a2, a3 + vse8.v v16, (a1) + add a1, a1, a3 + bnez a0, xor_regs_2_ + ret +END(xor_regs_2_) +EXPORT_SYMBOL(xor_regs_2_) + +ENTRY(xor_regs_3_) + vsetvli a4, a0, e8, m8, ta, ma + vle8.v v0, (a1) + vle8.v v8, (a2) + sub a0, a0, a4 + vxor.vv v0, v0, v8 + vle8.v v16, (a3) + add a2, a2, a4 + vxor.vv v16, v0, v16 + add a3, a3, a4 + vse8.v v16, (a1) + add a1, a1, a4 + bnez a0, xor_regs_3_ + ret +END(xor_regs_3_) +EXPORT_SYMBOL(xor_regs_3_) + +ENTRY(xor_regs_4_) + vsetvli a5, a0, e8, m8, ta, ma + vle8.v v0, (a1) + vle8.v v8, (a2) + sub a0, a0, a5 + vxor.vv v0, v0, v8 + vle8.v v16, (a3) + add a2, a2, a5 + vxor.vv v0, v0, v16 + vle8.v v24, (a4) + add a3, a3, a5 + vxor.vv v16, v0, v24 + add a4, a4, a5 + vse8.v v16, (a1) + add a1, a1, a5 + bnez a0, xor_regs_4_ + ret +END(xor_regs_4_) +EXPORT_SYMBOL(xor_regs_4_) + +ENTRY(xor_regs_5_) + vsetvli a6, a0, e8, m8, ta, ma + vle8.v v0, (a1) + vle8.v v8, (a2) + sub a0, a0, a6 + vxor.vv v0, v0, v8 + vle8.v v16, (a3) + add a2, a2, a6 + vxor.vv v0, v0, v16 + vle8.v v24, (a4) + add a3, a3, a6 + vxor.vv v0, v0, v24 + vle8.v v8, (a5) + add a4, a4, a6 + vxor.vv v16, v0, v8 + add a5, a5, a6 + vse8.v v16, (a1) + add a1, a1, a6 + bnez a0, xor_regs_5_ + ret +END(xor_regs_5_) +EXPORT_SYMBOL(xor_regs_5_)