lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20251210-profiles-v1-10-315a6ff2ca5a@gmail.com>
Date: Wed, 10 Dec 2025 08:13:47 -0800
From: Charlie Jenkins <charlie@...osinc.com>
To: Paul Walmsley <pjw@...nel.org>, Palmer Dabbelt <palmer@...belt.com>, 
 Alexandre Ghiti <alex@...ti.fr>, Anup Patel <anup@...infault.org>, 
 Atish Patra <atish.patra@...ux.dev>, 
 Samuel Holland <samuel.holland@...ive.com>, 
 Björn Töpel <bjorn@...nel.org>, 
 Luke Nelson <luke.r.nels@...il.com>, Xi Wang <xi.wang@...il.com>, 
 Eric Biggers <ebiggers@...nel.org>, Conor Dooley <conor@...nel.org>
Cc: linux-riscv@...ts.infradead.org, linux-kernel@...r.kernel.org, 
 Charlie Jenkins <thecharlesjenkins@...il.com>
Subject: [PATCH RFC 10/10] riscv: csum: Remove inline assembly

When the kernel is set to have zbb enabled by default, the compiler
generates better code than is possible with the inline assembly.
Removing the inline assembly will greatly simplify the checksumming code
and improve the performance when zbb is enabled. However, performance
will be decreased on kernels where only runtime discovery is enabled.
Moving towards this performance model of optimizing for compiled-in
extensions will help to keep the kernel code from spinning out of
control with the vast amount of extensions that are available to riscv.

Signed-off-by: Charlie Jenkins <thecharlesjenkins@...il.com>
---
 arch/riscv/include/asm/checksum.h | 32 -------------
 arch/riscv/lib/csum.c             | 94 ---------------------------------------
 2 files changed, 126 deletions(-)

diff --git a/arch/riscv/include/asm/checksum.h b/arch/riscv/include/asm/checksum.h
index e747af23eea2..ecc4779209b9 100644
--- a/arch/riscv/include/asm/checksum.h
+++ b/arch/riscv/include/asm/checksum.h
@@ -45,38 +45,6 @@ static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
 			csum += csum < ((const unsigned int *)iph)[pos];
 	} while (++pos < ihl);
 
-	/*
-	 * ZBB only saves three instructions on 32-bit and five on 64-bit so not
-	 * worth checking if supported without Alternatives.
-	 */
-	if (IS_ENABLED(CONFIG_RISCV_ISA_ZBB) &&
-	    IS_ENABLED(CONFIG_TOOLCHAIN_HAS_ZBB) &&
-	    riscv_has_extension_likely(ZBB)) {
-		unsigned long fold_temp;
-
-		if (IS_ENABLED(CONFIG_32BIT)) {
-			asm(".option push				\n\
-			.option arch,+zbb				\n\
-				not	%[fold_temp], %[csum]		\n\
-				rori	%[csum], %[csum], 16		\n\
-				sub	%[csum], %[fold_temp], %[csum]	\n\
-			.option pop"
-			: [csum] "+r" (csum), [fold_temp] "=&r" (fold_temp));
-		} else {
-			asm(".option push				\n\
-			.option arch,+zbb				\n\
-				rori	%[fold_temp], %[csum], 32	\n\
-				add	%[csum], %[fold_temp], %[csum]	\n\
-				srli	%[csum], %[csum], 32		\n\
-				not	%[fold_temp], %[csum]		\n\
-				roriw	%[csum], %[csum], 16		\n\
-				subw	%[csum], %[fold_temp], %[csum]	\n\
-			.option pop"
-			: [csum] "+r" (csum), [fold_temp] "=&r" (fold_temp));
-		}
-		return (__force __sum16)(csum >> 16);
-	}
-
 #ifndef CONFIG_32BIT
 	csum += ror64(csum, 32);
 	csum >>= 32;
diff --git a/arch/riscv/lib/csum.c b/arch/riscv/lib/csum.c
index 4db35dd698eb..93c073f2b883 100644
--- a/arch/riscv/lib/csum.c
+++ b/arch/riscv/lib/csum.c
@@ -40,24 +40,6 @@ __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
 	uproto = (__force unsigned int)htonl(proto);
 	sum += uproto;
 
-	if (IS_ENABLED(CONFIG_RISCV_ISA_ZBB) &&
-	    IS_ENABLED(CONFIG_TOOLCHAIN_HAS_ZBB) &&
-	    riscv_has_extension_likely(ZBB)) {
-		unsigned long fold_temp;
-
-		asm(".option push					\n\
-		.option arch,+zbb					\n\
-			rori	%[fold_temp], %[sum], 32		\n\
-			add	%[sum], %[fold_temp], %[sum]		\n\
-			srli	%[sum], %[sum], 32			\n\
-			not	%[fold_temp], %[sum]			\n\
-			roriw	%[sum], %[sum], 16			\n\
-			subw	%[sum], %[fold_temp], %[sum]		\n\
-		.option pop"
-		: [sum] "+r" (sum), [fold_temp] "=&r" (fold_temp));
-		return (__force __sum16)(sum >> 16);
-	}
-
 	sum += ror64(sum, 32);
 	sum >>= 32;
 	return csum_fold((__force __wsum)sum);
@@ -142,51 +124,6 @@ do_csum_with_alignment(const unsigned char *buff, int len)
 	end = (const unsigned long *)(buff + len);
 	csum = do_csum_common(ptr, end, data);
 
-#ifdef CC_HAS_ASM_GOTO_TIED_OUTPUT
-	if (IS_ENABLED(CONFIG_RISCV_ISA_ZBB) &&
-	    IS_ENABLED(CONFIG_TOOLCHAIN_HAS_ZBB) &&
-	    riscv_has_extension_likely(ZBB)) {
-		unsigned long fold_temp;
-
-#ifdef CONFIG_32BIT
-		asm_goto_output(".option push			\n\
-		.option arch,+zbb				\n\
-			rori	%[fold_temp], %[csum], 16	\n\
-			andi	%[offset], %[offset], 1		\n\
-			add	%[csum], %[fold_temp], %[csum]	\n\
-			beq	%[offset], zero, %l[end]	\n\
-			rev8	%[csum], %[csum]		\n\
-		.option pop"
-			: [csum] "+r" (csum), [fold_temp] "=&r" (fold_temp)
-			: [offset] "r" (offset)
-			:
-			: end);
-
-		return (unsigned short)csum;
-#else /* !CONFIG_32BIT */
-		asm_goto_output(".option push			\n\
-		.option arch,+zbb				\n\
-			rori	%[fold_temp], %[csum], 32	\n\
-			add	%[csum], %[fold_temp], %[csum]	\n\
-			srli	%[csum], %[csum], 32		\n\
-			roriw	%[fold_temp], %[csum], 16	\n\
-			addw	%[csum], %[fold_temp], %[csum]	\n\
-			andi	%[offset], %[offset], 1		\n\
-			beq	%[offset], zero, %l[end]	\n\
-			rev8	%[csum], %[csum]		\n\
-		.option pop"
-			: [csum] "+r" (csum), [fold_temp] "=&r" (fold_temp)
-			: [offset] "r" (offset)
-			:
-			: end);
-
-		return (csum << 16) >> 48;
-#endif /* !CONFIG_32BIT */
-end:
-		return csum >> 16;
-	}
-
-#endif /* CC_HAS_ASM_GOTO_TIED_OUTPUT */
 #ifndef CONFIG_32BIT
 	csum += ror64(csum, 32);
 	csum >>= 32;
@@ -215,37 +152,6 @@ do_csum_no_alignment(const unsigned char *buff, int len)
 	end = (const unsigned long *)(buff + len);
 	csum = do_csum_common(ptr, end, data);
 
-	if (IS_ENABLED(CONFIG_RISCV_ISA_ZBB) &&
-	    IS_ENABLED(CONFIG_TOOLCHAIN_HAS_ZBB) &&
-	    riscv_has_extension_likely(ZBB)) {
-		unsigned long fold_temp;
-
-#ifdef CONFIG_32BIT
-		asm (".option push				\n\
-		.option arch,+zbb				\n\
-			rori	%[fold_temp], %[csum], 16	\n\
-			add	%[csum], %[fold_temp], %[csum]	\n\
-		.option pop"
-			: [csum] "+r" (csum), [fold_temp] "=&r" (fold_temp)
-			:
-			: );
-
-#else /* !CONFIG_32BIT */
-		asm (".option push				\n\
-		.option arch,+zbb				\n\
-			rori	%[fold_temp], %[csum], 32	\n\
-			add	%[csum], %[fold_temp], %[csum]	\n\
-			srli	%[csum], %[csum], 32		\n\
-			roriw	%[fold_temp], %[csum], 16	\n\
-			addw	%[csum], %[fold_temp], %[csum]	\n\
-		.option pop"
-			: [csum] "+r" (csum), [fold_temp] "=&r" (fold_temp)
-			:
-			: );
-#endif /* !CONFIG_32BIT */
-		return csum >> 16;
-	}
-
 #ifndef CONFIG_32BIT
 	csum += ror64(csum, 32);
 	csum >>= 32;

-- 
2.43.0


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ