[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20221116041342.3841-5-elliott@hpe.com>
Date: Tue, 15 Nov 2022 22:13:22 -0600
From: Robert Elliott <elliott@....com>
To: herbert@...dor.apana.org.au, davem@...emloft.net,
tim.c.chen@...ux.intel.com, ap420073@...il.com, ardb@...nel.org,
Jason@...c4.com, David.Laight@...LAB.COM, ebiggers@...nel.org,
linux-crypto@...r.kernel.org, linux-kernel@...r.kernel.org
Cc: Robert Elliott <elliott@....com>
Subject: [PATCH v4 04/24] crypto: x86/sha - limit FPU preemption
Limit the number of bytes processed between kernel_fpu_begin() and
kernel_fpu_end() calls.
Those functions call preempt_disable() and preempt_enable(), so
the CPU core is unavailable for scheduling while running.
This leads to "rcu_preempt detected expedited stalls" with stack dumps
pointing to the optimized hash function if the module is loaded and
used a lot:
rcu: INFO: rcu_preempt detected expedited stalls on CPUs/tasks: ...
For example, that can occur during boot with the stack track pointing
to the sha512-x86 function if the system set to use SHA-512 for
module signing. The call trace includes:
module_sig_check
mod_verify_sig
pkcs7_verify
pkcs7_digest
sha512_finup
sha512_base_do_update
Fixes: 66be89515888 ("crypto: sha1 - SSSE3 based SHA1 implementation for x86-64")
Fixes: 8275d1aa6422 ("crypto: sha256 - Create module providing optimized SHA256 routines using SSSE3, AVX or AVX2 instructions.")
Fixes: 87de4579f92d ("crypto: sha512 - Create module providing optimized SHA512 routines using SSSE3, AVX or AVX2 instructions.")
Fixes: aa031b8f702e ("crypto: x86/sha512 - load based on CPU features")
Suggested-by: Herbert Xu <herbert@...dor.apana.org.au>
Reviewed-by: Tim Chen <tim.c.chen@...ux.intel.com>
Signed-off-by: Robert Elliott <elliott@....com>
---
v3 simplify to while loops rather than do..while loops, avoid
redundant checks for zero length, rename the limit macro and
change into a const, vary the limit for each algo
---
arch/x86/crypto/sha1_ssse3_glue.c | 64 ++++++++++++++++++++++-------
arch/x86/crypto/sha256_ssse3_glue.c | 64 ++++++++++++++++++++++-------
arch/x86/crypto/sha512_ssse3_glue.c | 55 +++++++++++++++++++------
3 files changed, 140 insertions(+), 43 deletions(-)
diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c
index 44340a1139e0..4bc77c84b0fb 100644
--- a/arch/x86/crypto/sha1_ssse3_glue.c
+++ b/arch/x86/crypto/sha1_ssse3_glue.c
@@ -26,8 +26,17 @@
#include <crypto/sha1_base.h>
#include <asm/simd.h>
+/* avoid kernel_fpu_begin/end scheduler/rcu stalls */
+#ifdef CONFIG_AS_SHA1_NI
+static const unsigned int bytes_per_fpu_shani = 34 * 1024;
+#endif
+static const unsigned int bytes_per_fpu_avx2 = 34 * 1024;
+static const unsigned int bytes_per_fpu_avx = 30 * 1024;
+static const unsigned int bytes_per_fpu_ssse3 = 26 * 1024;
+
static int sha1_update(struct shash_desc *desc, const u8 *data,
- unsigned int len, sha1_block_fn *sha1_xform)
+ unsigned int len, unsigned int bytes_per_fpu,
+ sha1_block_fn *sha1_xform)
{
struct sha1_state *sctx = shash_desc_ctx(desc);
@@ -41,22 +50,39 @@ static int sha1_update(struct shash_desc *desc, const u8 *data,
*/
BUILD_BUG_ON(offsetof(struct sha1_state, state) != 0);
- kernel_fpu_begin();
- sha1_base_do_update(desc, data, len, sha1_xform);
- kernel_fpu_end();
+ while (len) {
+ unsigned int chunk = min(len, bytes_per_fpu);
+
+ kernel_fpu_begin();
+ sha1_base_do_update(desc, data, chunk, sha1_xform);
+ kernel_fpu_end();
+
+ len -= chunk;
+ data += chunk;
+ }
return 0;
}
static int sha1_finup(struct shash_desc *desc, const u8 *data,
- unsigned int len, u8 *out, sha1_block_fn *sha1_xform)
+ unsigned int len, unsigned int bytes_per_fpu,
+ u8 *out, sha1_block_fn *sha1_xform)
{
if (!crypto_simd_usable())
return crypto_sha1_finup(desc, data, len, out);
+ while (len) {
+ unsigned int chunk = min(len, bytes_per_fpu);
+
+ kernel_fpu_begin();
+ sha1_base_do_update(desc, data, chunk, sha1_xform);
+ kernel_fpu_end();
+
+ len -= chunk;
+ data += chunk;
+ }
+
kernel_fpu_begin();
- if (len)
- sha1_base_do_update(desc, data, len, sha1_xform);
sha1_base_do_finalize(desc, sha1_xform);
kernel_fpu_end();
@@ -69,13 +95,15 @@ asmlinkage void sha1_transform_ssse3(struct sha1_state *state,
static int sha1_ssse3_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
- return sha1_update(desc, data, len, sha1_transform_ssse3);
+ return sha1_update(desc, data, len, bytes_per_fpu_ssse3,
+ sha1_transform_ssse3);
}
static int sha1_ssse3_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
- return sha1_finup(desc, data, len, out, sha1_transform_ssse3);
+ return sha1_finup(desc, data, len, bytes_per_fpu_ssse3, out,
+ sha1_transform_ssse3);
}
/* Add padding and return the message digest. */
@@ -119,13 +147,15 @@ asmlinkage void sha1_transform_avx(struct sha1_state *state,
static int sha1_avx_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
- return sha1_update(desc, data, len, sha1_transform_avx);
+ return sha1_update(desc, data, len, bytes_per_fpu_avx,
+ sha1_transform_avx);
}
static int sha1_avx_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
- return sha1_finup(desc, data, len, out, sha1_transform_avx);
+ return sha1_finup(desc, data, len, bytes_per_fpu_avx, out,
+ sha1_transform_avx);
}
static int sha1_avx_final(struct shash_desc *desc, u8 *out)
@@ -201,13 +231,15 @@ static void sha1_apply_transform_avx2(struct sha1_state *state,
static int sha1_avx2_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
- return sha1_update(desc, data, len, sha1_apply_transform_avx2);
+ return sha1_update(desc, data, len, bytes_per_fpu_avx2,
+ sha1_apply_transform_avx2);
}
static int sha1_avx2_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
- return sha1_finup(desc, data, len, out, sha1_apply_transform_avx2);
+ return sha1_finup(desc, data, len, bytes_per_fpu_avx2, out,
+ sha1_apply_transform_avx2);
}
static int sha1_avx2_final(struct shash_desc *desc, u8 *out)
@@ -251,13 +283,15 @@ asmlinkage void sha1_ni_transform(struct sha1_state *digest, const u8 *data,
static int sha1_ni_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
- return sha1_update(desc, data, len, sha1_ni_transform);
+ return sha1_update(desc, data, len, bytes_per_fpu_shani,
+ sha1_ni_transform);
}
static int sha1_ni_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
- return sha1_finup(desc, data, len, out, sha1_ni_transform);
+ return sha1_finup(desc, data, len, bytes_per_fpu_shani, out,
+ sha1_ni_transform);
}
static int sha1_ni_final(struct shash_desc *desc, u8 *out)
diff --git a/arch/x86/crypto/sha256_ssse3_glue.c b/arch/x86/crypto/sha256_ssse3_glue.c
index 3a5f6be7dbba..cdcdf5a80ffe 100644
--- a/arch/x86/crypto/sha256_ssse3_glue.c
+++ b/arch/x86/crypto/sha256_ssse3_glue.c
@@ -40,11 +40,20 @@
#include <linux/string.h>
#include <asm/simd.h>
+/* avoid kernel_fpu_begin/end scheduler/rcu stalls */
+#ifdef CONFIG_AS_SHA256_NI
+static const unsigned int bytes_per_fpu_shani = 13 * 1024;
+#endif
+static const unsigned int bytes_per_fpu_avx2 = 13 * 1024;
+static const unsigned int bytes_per_fpu_avx = 11 * 1024;
+static const unsigned int bytes_per_fpu_ssse3 = 11 * 1024;
+
asmlinkage void sha256_transform_ssse3(struct sha256_state *state,
const u8 *data, int blocks);
static int _sha256_update(struct shash_desc *desc, const u8 *data,
- unsigned int len, sha256_block_fn *sha256_xform)
+ unsigned int len, unsigned int bytes_per_fpu,
+ sha256_block_fn *sha256_xform)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
@@ -58,22 +67,39 @@ static int _sha256_update(struct shash_desc *desc, const u8 *data,
*/
BUILD_BUG_ON(offsetof(struct sha256_state, state) != 0);
- kernel_fpu_begin();
- sha256_base_do_update(desc, data, len, sha256_xform);
- kernel_fpu_end();
+ while (len) {
+ unsigned int chunk = min(len, bytes_per_fpu);
+
+ kernel_fpu_begin();
+ sha256_base_do_update(desc, data, chunk, sha256_xform);
+ kernel_fpu_end();
+
+ len -= chunk;
+ data += chunk;
+ }
return 0;
}
static int sha256_finup(struct shash_desc *desc, const u8 *data,
- unsigned int len, u8 *out, sha256_block_fn *sha256_xform)
+ unsigned int len, unsigned int bytes_per_fpu,
+ u8 *out, sha256_block_fn *sha256_xform)
{
if (!crypto_simd_usable())
return crypto_sha256_finup(desc, data, len, out);
+ while (len) {
+ unsigned int chunk = min(len, bytes_per_fpu);
+
+ kernel_fpu_begin();
+ sha256_base_do_update(desc, data, chunk, sha256_xform);
+ kernel_fpu_end();
+
+ len -= chunk;
+ data += chunk;
+ }
+
kernel_fpu_begin();
- if (len)
- sha256_base_do_update(desc, data, len, sha256_xform);
sha256_base_do_finalize(desc, sha256_xform);
kernel_fpu_end();
@@ -83,13 +109,15 @@ static int sha256_finup(struct shash_desc *desc, const u8 *data,
static int sha256_ssse3_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
- return _sha256_update(desc, data, len, sha256_transform_ssse3);
+ return _sha256_update(desc, data, len, bytes_per_fpu_ssse3,
+ sha256_transform_ssse3);
}
static int sha256_ssse3_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
- return sha256_finup(desc, data, len, out, sha256_transform_ssse3);
+ return sha256_finup(desc, data, len, bytes_per_fpu_ssse3,
+ out, sha256_transform_ssse3);
}
/* Add padding and return the message digest. */
@@ -149,13 +177,15 @@ asmlinkage void sha256_transform_avx(struct sha256_state *state,
static int sha256_avx_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
- return _sha256_update(desc, data, len, sha256_transform_avx);
+ return _sha256_update(desc, data, len, bytes_per_fpu_avx,
+ sha256_transform_avx);
}
static int sha256_avx_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
- return sha256_finup(desc, data, len, out, sha256_transform_avx);
+ return sha256_finup(desc, data, len, bytes_per_fpu_avx,
+ out, sha256_transform_avx);
}
static int sha256_avx_final(struct shash_desc *desc, u8 *out)
@@ -225,13 +255,15 @@ asmlinkage void sha256_transform_rorx(struct sha256_state *state,
static int sha256_avx2_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
- return _sha256_update(desc, data, len, sha256_transform_rorx);
+ return _sha256_update(desc, data, len, bytes_per_fpu_avx2,
+ sha256_transform_rorx);
}
static int sha256_avx2_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
- return sha256_finup(desc, data, len, out, sha256_transform_rorx);
+ return sha256_finup(desc, data, len, bytes_per_fpu_avx2,
+ out, sha256_transform_rorx);
}
static int sha256_avx2_final(struct shash_desc *desc, u8 *out)
@@ -300,13 +332,15 @@ asmlinkage void sha256_ni_transform(struct sha256_state *digest,
static int sha256_ni_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
- return _sha256_update(desc, data, len, sha256_ni_transform);
+ return _sha256_update(desc, data, len, bytes_per_fpu_shani,
+ sha256_ni_transform);
}
static int sha256_ni_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
- return sha256_finup(desc, data, len, out, sha256_ni_transform);
+ return sha256_finup(desc, data, len, bytes_per_fpu_shani,
+ out, sha256_ni_transform);
}
static int sha256_ni_final(struct shash_desc *desc, u8 *out)
diff --git a/arch/x86/crypto/sha512_ssse3_glue.c b/arch/x86/crypto/sha512_ssse3_glue.c
index 6d3b85e53d0e..c7036cfe2a7e 100644
--- a/arch/x86/crypto/sha512_ssse3_glue.c
+++ b/arch/x86/crypto/sha512_ssse3_glue.c
@@ -39,11 +39,17 @@
#include <asm/cpu_device_id.h>
#include <asm/simd.h>
+/* avoid kernel_fpu_begin/end scheduler/rcu stalls */
+static const unsigned int bytes_per_fpu_avx2 = 20 * 1024;
+static const unsigned int bytes_per_fpu_avx = 17 * 1024;
+static const unsigned int bytes_per_fpu_ssse3 = 17 * 1024;
+
asmlinkage void sha512_transform_ssse3(struct sha512_state *state,
const u8 *data, int blocks);
static int sha512_update(struct shash_desc *desc, const u8 *data,
- unsigned int len, sha512_block_fn *sha512_xform)
+ unsigned int len, unsigned int bytes_per_fpu,
+ sha512_block_fn *sha512_xform)
{
struct sha512_state *sctx = shash_desc_ctx(desc);
@@ -57,22 +63,39 @@ static int sha512_update(struct shash_desc *desc, const u8 *data,
*/
BUILD_BUG_ON(offsetof(struct sha512_state, state) != 0);
- kernel_fpu_begin();
- sha512_base_do_update(desc, data, len, sha512_xform);
- kernel_fpu_end();
+ while (len) {
+ unsigned int chunk = min(len, bytes_per_fpu);
+
+ kernel_fpu_begin();
+ sha512_base_do_update(desc, data, chunk, sha512_xform);
+ kernel_fpu_end();
+
+ len -= chunk;
+ data += chunk;
+ }
return 0;
}
static int sha512_finup(struct shash_desc *desc, const u8 *data,
- unsigned int len, u8 *out, sha512_block_fn *sha512_xform)
+ unsigned int len, unsigned int bytes_per_fpu,
+ u8 *out, sha512_block_fn *sha512_xform)
{
if (!crypto_simd_usable())
return crypto_sha512_finup(desc, data, len, out);
+ while (len) {
+ unsigned int chunk = min(len, bytes_per_fpu);
+
+ kernel_fpu_begin();
+ sha512_base_do_update(desc, data, chunk, sha512_xform);
+ kernel_fpu_end();
+
+ len -= chunk;
+ data += chunk;
+ }
+
kernel_fpu_begin();
- if (len)
- sha512_base_do_update(desc, data, len, sha512_xform);
sha512_base_do_finalize(desc, sha512_xform);
kernel_fpu_end();
@@ -82,13 +105,15 @@ static int sha512_finup(struct shash_desc *desc, const u8 *data,
static int sha512_ssse3_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
- return sha512_update(desc, data, len, sha512_transform_ssse3);
+ return sha512_update(desc, data, len, bytes_per_fpu_ssse3,
+ sha512_transform_ssse3);
}
static int sha512_ssse3_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
- return sha512_finup(desc, data, len, out, sha512_transform_ssse3);
+ return sha512_finup(desc, data, len, bytes_per_fpu_ssse3,
+ out, sha512_transform_ssse3);
}
/* Add padding and return the message digest. */
@@ -158,13 +183,15 @@ static bool avx_usable(void)
static int sha512_avx_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
- return sha512_update(desc, data, len, sha512_transform_avx);
+ return sha512_update(desc, data, len, bytes_per_fpu_avx,
+ sha512_transform_avx);
}
static int sha512_avx_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
- return sha512_finup(desc, data, len, out, sha512_transform_avx);
+ return sha512_finup(desc, data, len, bytes_per_fpu_avx,
+ out, sha512_transform_avx);
}
/* Add padding and return the message digest. */
@@ -224,13 +251,15 @@ asmlinkage void sha512_transform_rorx(struct sha512_state *state,
static int sha512_avx2_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
- return sha512_update(desc, data, len, sha512_transform_rorx);
+ return sha512_update(desc, data, len, bytes_per_fpu_avx2,
+ sha512_transform_rorx);
}
static int sha512_avx2_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
- return sha512_finup(desc, data, len, out, sha512_transform_rorx);
+ return sha512_finup(desc, data, len, bytes_per_fpu_avx2,
+ out, sha512_transform_rorx);
}
/* Add padding and return the message digest. */
--
2.38.1
Powered by blists - more mailing lists