[13/17] riscv: Add vector extension XOR implementation

Message ID	CAM2SziUvhVTvD2TcDSVUx2CiW9deEb1V8G56e_7avcdWsUomzw@mail.gmail.com (mailing list archive)
State	Superseded
Headers	show Return-Path: <linux-riscv-bounces+linux-riscv=archiver.kernel.org@lists.infradead.org> MIME-Version: 1.0 From: Chris Stillson <stillson@rivosinc.com> Date: Wed, 21 Sep 2022 09:49:44 -0700 Message-ID: <CAM2SziUvhVTvD2TcDSVUx2CiW9deEb1V8G56e_7avcdWsUomzw@mail.gmail.com> Subject: [PATCH 13/17] riscv: Add vector extension XOR implementation To: linux-riscv@lists.infradead.org Cc: palmer@dabbelt.com Precedence: list Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Sender: "linux-riscv" <linux-riscv-bounces@lists.infradead.org> Errors-To: linux-riscv-bounces+linux-riscv=archiver.kernel.org@lists.infradead.org
Series	Prctl to enable vector commands, previous vector patches rebased \| expand [00/17] Prctl to enable vector commands, previous vector patches rebased [01/17] riscv: Rename __switch_to_aux -> fpu [02/17] riscv: Extending cpufeature.c to detect V-extension [03/17] riscv: Add new csr defines related to vector extension [04/17] riscv: Add vector feature to compile [05/17] riscv: Add has_vector/riscv_vsize to save vector features. [06/17] riscv: Reset vector register [07/17] riscv: Add vector struct and assembler definitions [08/17] riscv: Add task switch support for vector [09/17] riscv: Add ptrace vector support [10/17] riscv: Add sigcontext save/restore for vector [11/17] riscv: signal: Report signal frame size to userspace via auxv [12/17] riscv: Add support for kernel mode vector [13/17] riscv: Add vector extension XOR implementation [14/17] riscv: Fix a kernel panic issue if $s2 is set to a specific value before entering Linux [15/17] riscv: Add V extension to KVM ISA allow list [16/17] riscv: KVM: Add vector lazy save/restore support [17/17] riscv: prctl to enable vector commands

Message ID

CAM2SziUvhVTvD2TcDSVUx2CiW9deEb1V8G56e_7avcdWsUomzw@mail.gmail.com (mailing list archive)

State

Superseded

Headers

MIME-Version: 1.0
From: Chris Stillson <stillson@rivosinc.com>
Date: Wed, 21 Sep 2022 09:49:44 -0700
Message-ID: 
 <CAM2SziUvhVTvD2TcDSVUx2CiW9deEb1V8G56e_7avcdWsUomzw@mail.gmail.com>
Subject: [PATCH 13/17] riscv: Add vector extension XOR implementation
To: linux-riscv@lists.infradead.org
Cc: palmer@dabbelt.com
Precedence: list
Content-Type: text/plain; charset="us-ascii"
Content-Transfer-Encoding: 7bit
Sender: "linux-riscv" <linux-riscv-bounces@lists.infradead.org>
Errors-To: 
 linux-riscv-bounces+linux-riscv=archiver.kernel.org@lists.infradead.org

Series

Prctl to enable vector commands, previous vector patches rebased | expand

Commit Message

Chris Stillson Sept. 21, 2022, 4:49 p.m. UTC

This patch adds support for vector optimized XOR and it is tested in
qemu.

Co-developed-by: Han-Kuan Chen <hankuan.chen@sifive.com>
Signed-off-by: Han-Kuan Chen <hankuan.chen@sifive.com>
Signed-off-by: Greentime Hu <greentime.hu@sifive.com>
---
 arch/riscv/include/asm/xor.h | 82 ++++++++++++++++++++++++++++++++++++
 arch/riscv/lib/Makefile      |  1 +
 arch/riscv/lib/xor.S         | 81 +++++++++++++++++++++++++++++++++++
 3 files changed, 164 insertions(+)
 create mode 100644 arch/riscv/include/asm/xor.h
 create mode 100644 arch/riscv/lib/xor.S

--
2.25.1

diff --git a/arch/riscv/include/asm/xor.h b/arch/riscv/include/asm/xor.h
new file mode 100644
index 000000000000..d1f2eeb14afb
--- /dev/null
+++ b/arch/riscv/include/asm/xor.h
@@ -0,0 +1,82 @@ 
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2021 SiFive
+ */
+
+#include <linux/hardirq.h>
+#include <asm-generic/xor.h>
+#ifdef CONFIG_VECTOR
+#include <asm/vector.h>
+#include <asm/switch_to.h>
+
+void xor_regs_2_(unsigned long bytes, unsigned long * __restrict p1,
+                const unsigned long * __restrict p2);
+void xor_regs_3_(unsigned long bytes, unsigned long * __restrict p1,
+                const unsigned long * __restrict p2,
+                const unsigned long * __restrict p3);
+void xor_regs_4_(unsigned long bytes, unsigned long * __restrict p1,
+                const unsigned long * __restrict p2,
+                const unsigned long * __restrict p3,
+                const unsigned long * __restrict p4);
+void xor_regs_5_(unsigned long bytes, unsigned long * __restrict p1,
+                const unsigned long * __restrict p2,
+                const unsigned long * __restrict p3,
+                const unsigned long * __restrict p4,
+                const unsigned long * __restrict p5);
+
+static void xor_rvv_2(unsigned long bytes, unsigned long * __restrict p1,
+                     const unsigned long * __restrict p2)
+{
+       kernel_rvv_begin();
+       xor_regs_2_(bytes, p1, p2);
+       kernel_rvv_end();
+}
+
+static void xor_rvv_3(unsigned long bytes, unsigned long * __restrict p1,
+                     const unsigned long * __restrict p2,
+                     const unsigned long * __restrict p3)
+{
+       kernel_rvv_begin();
+       xor_regs_3_(bytes, p1, p2, p3);
+       kernel_rvv_end();
+}
+
+static void xor_rvv_4(unsigned long bytes, unsigned long * __restrict p1,
+                     const unsigned long * __restrict p2,
+                     const unsigned long * __restrict p3,
+                     const unsigned long * __restrict p4)
+{
+       kernel_rvv_begin();
+       xor_regs_4_(bytes, p1, p2, p3, p4);
+       kernel_rvv_end();
+}
+
+static void xor_rvv_5(unsigned long bytes, unsigned long * __restrict p1,
+                     const unsigned long * __restrict p2,
+                     const unsigned long * __restrict p3,
+                     const unsigned long * __restrict p4,
+                     const unsigned long * __restrict p5)
+{
+       kernel_rvv_begin();
+       xor_regs_5_(bytes, p1, p2, p3, p4, p5);
+       kernel_rvv_end();
+}
+
+static struct xor_block_template xor_block_rvv = {
+       .name = "rvv",
+       .do_2 = xor_rvv_2,
+       .do_3 = xor_rvv_3,
+       .do_4 = xor_rvv_4,
+       .do_5 = xor_rvv_5
+};
+
+#undef XOR_TRY_TEMPLATES
+#define XOR_TRY_TEMPLATES           \
+       do {        \
+               xor_speed(&xor_block_8regs);    \
+               xor_speed(&xor_block_32regs);    \
+               if (has_vector()) { \
+                       xor_speed(&xor_block_rvv);\
+               } \
+       } while (0)
+#endif
diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile
index 25d5c9664e57..acd87ac86d24 100644
--- a/arch/riscv/lib/Makefile
+++ b/arch/riscv/lib/Makefile
@@ -7,3 +7,4 @@  lib-$(CONFIG_MMU)       += uaccess.o
 lib-$(CONFIG_64BIT)    += tishift.o

 obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
+lib-$(CONFIG_VECTOR)   += xor.o
diff --git a/arch/riscv/lib/xor.S b/arch/riscv/lib/xor.S
new file mode 100644
index 000000000000..3bc059e18171
--- /dev/null
+++ b/arch/riscv/lib/xor.S
@@ -0,0 +1,81 @@ 
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2021 SiFive
+ */
+#include <linux/linkage.h>
+#include <asm-generic/export.h>
+#include <asm/asm.h>
+
+ENTRY(xor_regs_2_)
+       vsetvli a3, a0, e8, m8, ta, ma
+       vle8.v v0, (a1)
+       vle8.v v8, (a2)
+       sub a0, a0, a3
+       vxor.vv v16, v0, v8
+       add a2, a2, a3
+       vse8.v v16, (a1)
+       add a1, a1, a3
+       bnez a0, xor_regs_2_
+       ret
+END(xor_regs_2_)
+EXPORT_SYMBOL(xor_regs_2_)
+
+ENTRY(xor_regs_3_)
+       vsetvli a4, a0, e8, m8, ta, ma
+       vle8.v v0, (a1)
+       vle8.v v8, (a2)
+       sub a0, a0, a4
+       vxor.vv v0, v0, v8
+       vle8.v v16, (a3)
+       add a2, a2, a4
+       vxor.vv v16, v0, v16
+       add a3, a3, a4
+       vse8.v v16, (a1)
+       add a1, a1, a4
+       bnez a0, xor_regs_3_
+       ret
+END(xor_regs_3_)
+EXPORT_SYMBOL(xor_regs_3_)
+
+ENTRY(xor_regs_4_)
+       vsetvli a5, a0, e8, m8, ta, ma
+       vle8.v v0, (a1)
+       vle8.v v8, (a2)
+       sub a0, a0, a5
+       vxor.vv v0, v0, v8
+       vle8.v v16, (a3)
+       add a2, a2, a5
+       vxor.vv v0, v0, v16
+       vle8.v v24, (a4)
+       add a3, a3, a5
+       vxor.vv v16, v0, v24
+       add a4, a4, a5
+       vse8.v v16, (a1)
+       add a1, a1, a5
+       bnez a0, xor_regs_4_
+       ret
+END(xor_regs_4_)
+EXPORT_SYMBOL(xor_regs_4_)
+
+ENTRY(xor_regs_5_)
+       vsetvli a6, a0, e8, m8, ta, ma
+       vle8.v v0, (a1)
+       vle8.v v8, (a2)
+       sub a0, a0, a6
+       vxor.vv v0, v0, v8
+       vle8.v v16, (a3)
+       add a2, a2, a6
+       vxor.vv v0, v0, v16
+       vle8.v v24, (a4)
+       add a3, a3, a6
+       vxor.vv v0, v0, v24
+       vle8.v v8, (a5)
+       add a4, a4, a6
+       vxor.vv v16, v0, v8
+       add a5, a5, a6
+       vse8.v v16, (a1)
+       add a1, a1, a6
+       bnez a0, xor_regs_5_
+       ret
+END(xor_regs_5_)
+EXPORT_SYMBOL(xor_regs_5_)

[13/17] riscv: Add vector extension XOR implementation

Commit Message

Patch