lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Wed,  2 Nov 2022 23:27:32 -0500
From:   Robert Elliott <elliott@....com>
To:     herbert@...dor.apana.org.au, davem@...emloft.net,
        tim.c.chen@...ux.intel.com, ap420073@...il.com, ardb@...nel.org,
        Jason@...c4.com, David.Laight@...LAB.COM, ebiggers@...nel.org,
        linux-crypto@...r.kernel.org, linux-kernel@...r.kernel.org
Cc:     Robert Elliott <elliott@....com>
Subject: [PATCH v3 09/17] crypto: x86/ghash - limit FPU preemption

Limit the number of bytes processed between kernel_fpu_begin() and
kernel_fpu_end() calls.

Those functions call preempt_disable() and preempt_enable(), so
the CPU core is unavailable for scheduling while running, leading to:
    rcu: INFO: rcu_preempt detected expedited stalls on CPUs/tasks: ...

Fixes: 0e1227d356e9 ("crypto: ghash - Add PCLMULQDQ accelerated implementation")
Suggested-by: Herbert Xu <herbert@...dor.apana.org.au>
Signed-off-by: Robert Elliott <elliott@....com>

---
v3 change to static int, simplify while loop
---
 arch/x86/crypto/ghash-clmulni-intel_glue.c | 28 +++++++++++++++-------
 1 file changed, 19 insertions(+), 9 deletions(-)

diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c
index 22367e363d72..0f24c3b23fd2 100644
--- a/arch/x86/crypto/ghash-clmulni-intel_glue.c
+++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c
@@ -20,8 +20,11 @@
 #include <asm/cpu_device_id.h>
 #include <asm/simd.h>
 
-#define GHASH_BLOCK_SIZE	16
-#define GHASH_DIGEST_SIZE	16
+#define GHASH_BLOCK_SIZE	16U
+#define GHASH_DIGEST_SIZE	16U
+
+/* avoid kernel_fpu_begin/end scheduler/rcu stalls */
+static const unsigned int bytes_per_fpu = 50 * 1024;
 
 void clmul_ghash_mul(u8 *dst, const u128 *shash);
 
@@ -80,9 +83,11 @@ static int ghash_update(struct shash_desc *desc,
 	struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm);
 	u8 *dst = dctx->buffer;
 
+	BUILD_BUG_ON(bytes_per_fpu < GHASH_BLOCK_SIZE);
+
 	if (dctx->bytes) {
 		int n = min(srclen, dctx->bytes);
-		u8 *pos = dst + (GHASH_BLOCK_SIZE - dctx->bytes);
+		u8 *pos = dst + GHASH_BLOCK_SIZE - dctx->bytes;
 
 		dctx->bytes -= n;
 		srclen -= n;
@@ -97,13 +102,18 @@ static int ghash_update(struct shash_desc *desc,
 		}
 	}
 
-	kernel_fpu_begin();
-	clmul_ghash_update(dst, src, srclen, &ctx->shash);
-	kernel_fpu_end();
+	while (srclen >= GHASH_BLOCK_SIZE) {
+		unsigned int chunk = min(srclen, bytes_per_fpu);
+
+		kernel_fpu_begin();
+		clmul_ghash_update(dst, src, chunk, &ctx->shash);
+		kernel_fpu_end();
+
+		src += chunk & ~(GHASH_BLOCK_SIZE - 1);
+		srclen -= chunk & ~(GHASH_BLOCK_SIZE - 1);
+	}
 
-	if (srclen & 0xf) {
-		src += srclen - (srclen & 0xf);
-		srclen &= 0xf;
+	if (srclen) {
 		dctx->bytes = GHASH_BLOCK_SIZE - srclen;
 		while (srclen--)
 			*dst++ ^= *src++;
-- 
2.37.3

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ