[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20230826-optimize_checksum-v1-1-937501b4522a@rivosinc.com>
Date: Sat, 26 Aug 2023 18:26:06 -0700
From: Charlie Jenkins <charlie@...osinc.com>
To: linux-riscv@...ts.infradead.org, linux-kernel@...r.kernel.org
Cc: Paul Walmsley <paul.walmsley@...ive.com>,
Palmer Dabbelt <palmer@...belt.com>,
Albert Ou <aou@...s.berkeley.edu>,
Charlie Jenkins <charlie@...osinc.com>
Subject: [PATCH 1/5] riscv: Checksum header
Provide checksum algorithms that have been designed to leverage riscv
instructions such as rotate. In 64-bit, can take advantage of the larger
register to avoid some overflow checking.
Add configuration for Zba extension and add march for Zba and Zbb.
Signed-off-by: Charlie Jenkins <charlie@...osinc.com>
---
arch/riscv/Kconfig | 23 +++++++++++
arch/riscv/Makefile | 2 +
arch/riscv/include/asm/checksum.h | 86 +++++++++++++++++++++++++++++++++++++++
3 files changed, 111 insertions(+)
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 4c07b9189c86..8d7e475ca28d 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -507,6 +507,29 @@ config RISCV_ISA_V_DEFAULT_ENABLE
If you don't know what to do here, say Y.
+config TOOLCHAIN_HAS_ZBA
+ bool
+ default y
+ depends on !64BIT || $(cc-option,-mabi=lp64 -march=rv64ima_zba)
+ depends on !32BIT || $(cc-option,-mabi=ilp32 -march=rv32ima_zba)
+ depends on LLD_VERSION >= 150000 || LD_VERSION >= 23900
+ depends on AS_HAS_OPTION_ARCH
+
+config RISCV_ISA_ZBA
+ bool "Zba extension support for bit manipulation instructions"
+ depends on TOOLCHAIN_HAS_ZBA
+ depends on MMU
+ depends on RISCV_ALTERNATIVE
+ default y
+ help
+ Adds support to dynamically detect the presence of the ZBA
+ extension (basic bit manipulation) and enable its usage.
+
+ The Zba extension provides instructions to accelerate a number
+ of bit-specific address creation operations.
+
+ If you don't know what to do here, say Y.
+
config TOOLCHAIN_HAS_ZBB
bool
default y
diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
index 6ec6d52a4180..51fa3f67fc9a 100644
--- a/arch/riscv/Makefile
+++ b/arch/riscv/Makefile
@@ -61,6 +61,8 @@ riscv-march-$(CONFIG_ARCH_RV64I) := rv64ima
riscv-march-$(CONFIG_FPU) := $(riscv-march-y)fd
riscv-march-$(CONFIG_RISCV_ISA_C) := $(riscv-march-y)c
riscv-march-$(CONFIG_RISCV_ISA_V) := $(riscv-march-y)v
+riscv-march-$(CONFIG_RISCV_ISA_ZBA) := $(riscv-march-y)_zba
+riscv-march-$(CONFIG_RISCV_ISA_ZBB) := $(riscv-march-y)_zbb
ifdef CONFIG_TOOLCHAIN_NEEDS_OLD_ISA_SPEC
KBUILD_CFLAGS += -Wa,-misa-spec=2.2
diff --git a/arch/riscv/include/asm/checksum.h b/arch/riscv/include/asm/checksum.h
new file mode 100644
index 000000000000..cd98f8cde888
--- /dev/null
+++ b/arch/riscv/include/asm/checksum.h
@@ -0,0 +1,86 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * IP checksum routines
+ *
+ * Copyright (C) 2023 Rivos Inc.
+ */
+#ifndef __ASM_RISCV_CHECKSUM_H
+#define __ASM_RISCV_CHECKSUM_H
+
+#include <linux/in6.h>
+#include <linux/uaccess.h>
+
+/* Default version is sufficient for 32 bit */
+#ifdef CONFIG_64BIT
+#define _HAVE_ARCH_IPV6_CSUM
+__sum16 csum_ipv6_magic(const struct in6_addr *saddr,
+ const struct in6_addr *daddr,
+ __u32 len, __u8 proto, __wsum sum);
+#endif
+
+/*
+ * Fold a partial checksum without adding pseudo headers
+ */
+static inline __sum16 csum_fold(__wsum sum)
+{
+ sum += (sum >> 16) | (sum << 16);
+ return (__force __sum16)(~(sum >> 16));
+}
+
+#define csum_fold csum_fold
+
+/*
+ * This is a version of ip_compute_csum() optimized for IP headers,
+ * which always checksum on 4 octet boundaries.
+ * Optimized for 32 and 64 bit platforms, with and without vector, with and
+ * without the bitmanip extensions zba/zbb.
+ */
+#ifdef CONFIG_32BIT
+static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
+{
+ __wsum csum = 0;
+ int pos = 0;
+
+ do {
+ csum += ((const __wsum *)iph)[pos];
+ csum += csum < ((const __wsum *)iph)[pos];
+ } while (++pos < ihl);
+ return csum_fold(csum);
+}
+#else
+
+/*
+ * Quickly compute an IP checksum with the assumption that IPv4 headers will
+ * always be in multiples of 32-bits, and have an ihl of at least 5.
+ * @ihl is the number of 32 bit segments and must be greater than or equal to 5.
+ * @iph is also assumed to be word aligned.
+ */
+static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
+{
+ unsigned long beginning;
+ unsigned long csum = 0;
+
+ beginning = ((const unsigned long *)iph)[0];
+ beginning += ((const unsigned long *)iph)[1];
+ beginning += beginning < ((const unsigned long *)iph)[1];
+ int pos = 4;
+
+ do {
+ csum += ((const unsigned int *)iph)[pos];
+ } while (++pos < ihl);
+ csum += beginning;
+ csum += csum < beginning;
+ csum += (csum >> 32) | (csum << 32); // Calculate overflow
+ return csum_fold((__force __wsum)(csum >> 32));
+}
+#endif
+#define ip_fast_csum ip_fast_csum
+
+#ifdef CONFIG_64BIT
+extern unsigned int do_csum(const unsigned char *buff, int len);
+#define do_csum do_csum
+#endif
+
+#include <asm-generic/checksum.h>
+
+#endif
--
2.41.0
Powered by blists - more mailing lists