[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <c6330b675e015d2e0beb6a750cfc1101f2ddd83f.1636362169.git.greentime.hu@sifive.com>
Date: Tue, 9 Nov 2021 17:48:27 +0800
From: Greentime Hu <greentime.hu@...ive.com>
To: palmer@...belt.com, paul.walmsley@...ive.com,
linux-riscv@...ts.infradead.org, linux-kernel@...r.kernel.org,
aou@...s.berkeley.edu
Subject: [PATCH v9 15/17] riscv: Add vector extension XOR implementation
This patch adds support for vector optimized XOR it is tested in spike and
qemu.
Logs in spike:
[ 0.008365] xor: measuring software checksum speed
[ 0.048885] 8regs : 1719.000 MB/sec
[ 0.089080] 32regs : 1717.000 MB/sec
[ 0.129275] rvv : 7043.000 MB/sec
[ 0.129525] xor: using function: rvv (7043.000 MB/sec)
Logs in qemu:
[ 0.098943] xor: measuring software checksum speed
[ 0.139391] 8regs : 2911.000 MB/sec
[ 0.181079] 32regs : 2813.000 MB/sec
[ 0.224260] rvv : 45.000 MB/sec
[ 0.225586] xor: using function: 8regs (2911.000 MB/sec)
Co-developed-by: Han-Kuan Chen <hankuan.chen@...ive.com>
Signed-off-by: Han-Kuan Chen <hankuan.chen@...ive.com>
Signed-off-by: Greentime Hu <greentime.hu@...ive.com>
---
arch/riscv/include/asm/xor.h | 66 +++++++++++++++++++++++++++++
arch/riscv/lib/Makefile | 1 +
arch/riscv/lib/xor.S | 81 ++++++++++++++++++++++++++++++++++++
3 files changed, 148 insertions(+)
create mode 100644 arch/riscv/include/asm/xor.h
create mode 100644 arch/riscv/lib/xor.S
diff --git a/arch/riscv/include/asm/xor.h b/arch/riscv/include/asm/xor.h
new file mode 100644
index 000000000000..5e86277364b5
--- /dev/null
+++ b/arch/riscv/include/asm/xor.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2021 SiFive
+ */
+
+#include <linux/hardirq.h>
+#include <asm-generic/xor.h>
+#ifdef CONFIG_VECTOR
+#include <asm/vector.h>
+#include <asm/switch_to.h>
+
+void xor_regs_2_(unsigned long bytes, unsigned long *p1, unsigned long *p2);
+void xor_regs_3_(unsigned long bytes, unsigned long *p1, unsigned long *p2, unsigned long *p3);
+void xor_regs_4_(unsigned long bytes, unsigned long *p1, unsigned long *p2, unsigned long *p3,
+ unsigned long *p4);
+void xor_regs_5_(unsigned long bytes, unsigned long *p1, unsigned long *p2, unsigned long *p3,
+ unsigned long *p4, unsigned long *p5);
+
+static void xor_rvv_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+{
+ kernel_rvv_begin();
+ xor_regs_2_(bytes, p1, p2);
+ kernel_rvv_end();
+}
+
+static void xor_rvv_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, unsigned long *p3)
+{
+ kernel_rvv_begin();
+ xor_regs_3_(bytes, p1, p2, p3);
+ kernel_rvv_end();
+}
+
+static void xor_rvv_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, unsigned long *p3,
+ unsigned long *p4)
+{
+ kernel_rvv_begin();
+ xor_regs_4_(bytes, p1, p2, p3, p4);
+ kernel_rvv_end();
+}
+
+static void xor_rvv_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, unsigned long *p3,
+ unsigned long *p4, unsigned long *p5)
+{
+ kernel_rvv_begin();
+ xor_regs_5_(bytes, p1, p2, p3, p4, p5);
+ kernel_rvv_end();
+}
+
+static struct xor_block_template xor_block_rvv = {
+ .name = "rvv",
+ .do_2 = xor_rvv_2,
+ .do_3 = xor_rvv_3,
+ .do_4 = xor_rvv_4,
+ .do_5 = xor_rvv_5
+};
+
+#undef XOR_TRY_TEMPLATES
+#define XOR_TRY_TEMPLATES \
+ do { \
+ xor_speed(&xor_block_8regs); \
+ xor_speed(&xor_block_32regs); \
+ if (has_vector()) { \
+ xor_speed(&xor_block_rvv);\
+ } \
+ } while (0)
+#endif
diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile
index 25d5c9664e57..acd87ac86d24 100644
--- a/arch/riscv/lib/Makefile
+++ b/arch/riscv/lib/Makefile
@@ -7,3 +7,4 @@ lib-$(CONFIG_MMU) += uaccess.o
lib-$(CONFIG_64BIT) += tishift.o
obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
+lib-$(CONFIG_VECTOR) += xor.o
diff --git a/arch/riscv/lib/xor.S b/arch/riscv/lib/xor.S
new file mode 100644
index 000000000000..bb54e77df046
--- /dev/null
+++ b/arch/riscv/lib/xor.S
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2021 SiFive
+ */
+#include <linux/linkage.h>
+#include <asm-generic/export.h>
+#include <asm/asm.h>
+
+ENTRY(xor_regs_2_)
+ vsetvli a3, a0, e8, m8
+ vle8.v v0, (a1)
+ vle8.v v8, (a2)
+ sub a0, a0, a3
+ vxor.vv v16, v0, v8
+ add a2, a2, a3
+ vse8.v v16, (a1)
+ add a1, a1, a3
+ bnez a0, xor_regs_2_
+ ret
+END(xor_regs_2_)
+EXPORT_SYMBOL(xor_regs_2_)
+
+ENTRY(xor_regs_3_)
+ vsetvli a4, a0, e8, m8
+ vle8.v v0, (a1)
+ vle8.v v8, (a2)
+ sub a0, a0, a4
+ vxor.vv v0, v0, v8
+ vle8.v v16, (a3)
+ add a2, a2, a4
+ vxor.vv v16, v0, v16
+ add a3, a3, a4
+ vse8.v v16, (a1)
+ add a1, a1, a4
+ bnez a0, xor_regs_3_
+ ret
+END(xor_regs_3_)
+EXPORT_SYMBOL(xor_regs_3_)
+
+ENTRY(xor_regs_4_)
+ vsetvli a5, a0, e8, m8
+ vle8.v v0, (a1)
+ vle8.v v8, (a2)
+ sub a0, a0, a5
+ vxor.vv v0, v0, v8
+ vle8.v v16, (a3)
+ add a2, a2, a5
+ vxor.vv v0, v0, v16
+ vle8.v v24, (a4)
+ add a3, a3, a5
+ vxor.vv v16, v0, v24
+ add a4, a4, a5
+ vse8.v v16, (a1)
+ add a1, a1, a5
+ bnez a0, xor_regs_4_
+ ret
+END(xor_regs_4_)
+EXPORT_SYMBOL(xor_regs_4_)
+
+ENTRY(xor_regs_5_)
+ vsetvli a6, a0, e8, m8
+ vle8.v v0, (a1)
+ vle8.v v8, (a2)
+ sub a0, a0, a6
+ vxor.vv v0, v0, v8
+ vle8.v v16, (a3)
+ add a2, a2, a6
+ vxor.vv v0, v0, v16
+ vle8.v v24, (a4)
+ add a3, a3, a6
+ vxor.vv v0, v0, v24
+ vle8.v v8, (a5)
+ add a4, a4, a6
+ vxor.vv v16, v0, v8
+ add a5, a5, a6
+ vse8.v v16, (a1)
+ add a1, a1, a6
+ bnez a0, xor_regs_5_
+ ret
+END(xor_regs_5_)
+EXPORT_SYMBOL(xor_regs_5_)
--
2.31.1
Powered by blists - more mailing lists