lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20221012215931.3896-20-elliott@hpe.com>
Date:   Wed, 12 Oct 2022 16:59:31 -0500
From:   Robert Elliott <elliott@....com>
To:     herbert@...dor.apana.org.au, davem@...emloft.net,
        tim.c.chen@...ux.intel.com, ap420073@...il.com, ardb@...nel.org,
        linux-crypto@...r.kernel.org, linux-kernel@...r.kernel.org
Cc:     Robert Elliott <elliott@....com>
Subject: [PATCH v2 19/19] crypto: x86/sha - register only the best function

Don't register and unregister each of the functions from least-
to most-optimized (SSSE3 then AVX then AVX2); determine the
most-optimized function and load only that version.

Suggested-by: Tim Chen <tim.c.chen@...ux.intel.com>
Signed-off-by: Robert Elliott <elliott@....com>
---
 arch/x86/crypto/sha1_ssse3_glue.c   | 139 ++++++++++++-------------
 arch/x86/crypto/sha256_ssse3_glue.c | 154 ++++++++++++++--------------
 arch/x86/crypto/sha512_ssse3_glue.c | 120 ++++++++++++----------
 3 files changed, 210 insertions(+), 203 deletions(-)

diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c
index edffc33bd12e..90a86d737bcf 100644
--- a/arch/x86/crypto/sha1_ssse3_glue.c
+++ b/arch/x86/crypto/sha1_ssse3_glue.c
@@ -123,17 +123,16 @@ static struct shash_alg sha1_ssse3_alg = {
 	}
 };
 
-static int register_sha1_ssse3(void)
-{
-	if (boot_cpu_has(X86_FEATURE_SSSE3))
-		return crypto_register_shash(&sha1_ssse3_alg);
-	return 0;
-}
-
+static bool sha1_ssse3_registered;
+static bool sha1_avx_registered;
+static bool sha1_avx2_registered;
+static bool sha1_ni_registered;
 static void unregister_sha1_ssse3(void)
 {
-	if (boot_cpu_has(X86_FEATURE_SSSE3))
+	if (sha1_ssse3_registered) {
 		crypto_unregister_shash(&sha1_ssse3_alg);
+		sha1_ssse3_registered = 0;
+	}
 }
 
 asmlinkage void sha1_transform_avx(struct sha1_state *state,
@@ -172,28 +171,12 @@ static struct shash_alg sha1_avx_alg = {
 	}
 };
 
-static bool avx_usable(void)
-{
-	if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) {
-		if (boot_cpu_has(X86_FEATURE_AVX))
-			pr_info("AVX detected but unusable.\n");
-		return false;
-	}
-
-	return true;
-}
-
-static int register_sha1_avx(void)
-{
-	if (avx_usable())
-		return crypto_register_shash(&sha1_avx_alg);
-	return 0;
-}
-
 static void unregister_sha1_avx(void)
 {
-	if (avx_usable())
+	if (sha1_avx_registered) {
 		crypto_unregister_shash(&sha1_avx_alg);
+		sha1_avx_registered = 0;
+	}
 }
 
 #define SHA1_AVX2_BLOCK_OPTSIZE	4	/* optimal 4*64 bytes of SHA1 blocks */
@@ -201,16 +184,6 @@ static void unregister_sha1_avx(void)
 asmlinkage void sha1_transform_avx2(struct sha1_state *state,
 				    const u8 *data, int blocks);
 
-static bool avx2_usable(void)
-{
-	if (avx_usable() && boot_cpu_has(X86_FEATURE_AVX2)
-		&& boot_cpu_has(X86_FEATURE_BMI1)
-		&& boot_cpu_has(X86_FEATURE_BMI2))
-		return true;
-
-	return false;
-}
-
 static void sha1_apply_transform_avx2(struct sha1_state *state,
 				      const u8 *data, int blocks)
 {
@@ -254,17 +227,13 @@ static struct shash_alg sha1_avx2_alg = {
 	}
 };
 
-static int register_sha1_avx2(void)
-{
-	if (avx2_usable())
-		return crypto_register_shash(&sha1_avx2_alg);
-	return 0;
-}
 
 static void unregister_sha1_avx2(void)
 {
-	if (avx2_usable())
+	if (sha1_avx2_registered) {
 		crypto_unregister_shash(&sha1_avx2_alg);
+		sha1_avx2_registered = 0;
+	}
 }
 
 #ifdef CONFIG_AS_SHA1_NI
@@ -304,13 +273,6 @@ static struct shash_alg sha1_ni_alg = {
 	}
 };
 
-static int register_sha1_ni(void)
-{
-	if (boot_cpu_has(X86_FEATURE_SHA_NI))
-		return crypto_register_shash(&sha1_ni_alg);
-	return 0;
-}
-
 static const struct x86_cpu_id module_cpu_ids[] = {
 	X86_MATCH_FEATURE(X86_FEATURE_SHA_NI, NULL),
 	X86_MATCH_FEATURE(X86_FEATURE_AVX2, NULL),
@@ -322,44 +284,79 @@ MODULE_DEVICE_TABLE(x86cpu, module_cpu_ids);
 
 static void unregister_sha1_ni(void)
 {
-	if (boot_cpu_has(X86_FEATURE_SHA_NI))
+	if (sha1_ni_registered) {
 		crypto_unregister_shash(&sha1_ni_alg);
+		sha1_ni_registered = 0;
+	}
 }
 
 #else
-static inline int register_sha1_ni(void) { return 0; }
 static inline void unregister_sha1_ni(void) { }
 #endif
 
 static int __init sha1_ssse3_mod_init(void)
 {
-	if (register_sha1_ssse3())
-		goto fail;
+	const char *feature_name;
+	const char *driver_name = NULL;
+	int ret;
 
 	if (!x86_match_cpu(module_cpu_ids))
 		return -ENODEV;
 
-	if (register_sha1_avx()) {
-		unregister_sha1_ssse3();
-		goto fail;
-	}
+	/* SHA-NI */
+	if (boot_cpu_has(X86_FEATURE_SHA_NI)) {
 
-	if (register_sha1_avx2()) {
-		unregister_sha1_avx();
-		unregister_sha1_ssse3();
-		goto fail;
-	}
+		ret = crypto_register_shash(&sha1_ni_alg);
+		if (!ret)
+			sha1_ni_registered = 1;
 
-	if (register_sha1_ni()) {
-		unregister_sha1_avx2();
-		unregister_sha1_avx();
-		unregister_sha1_ssse3();
-		goto fail;
+	/* AVX2 */
+	} else if (boot_cpu_has(X86_FEATURE_AVX2)) {
+
+		if (boot_cpu_has(X86_FEATURE_BMI1) &&
+		    boot_cpu_has(X86_FEATURE_BMI2)) {
+
+			ret = crypto_register_shash(&sha1_avx2_alg);
+			if (!ret) {
+				sha1_avx2_registered = 1;
+				driver_name = sha1_avx2_alg.base.cra_driver_name;
+			}
+		} else {
+			pr_info("AVX2-optimized version not engaged, all required features (AVX2, BMI1, BMI2) not supported\n");
+		}
+
+	/* AVX */
+	} else if (boot_cpu_has(X86_FEATURE_AVX)) {
+
+		if (cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM,
+			       &feature_name)) {
+
+			ret = crypto_register_shash(&sha1_avx_alg);
+			if (!ret) {
+				sha1_avx_registered = 1;
+				driver_name = sha1_avx_alg.base.cra_driver_name;
+			}
+		} else {
+			pr_info("AVX-optimized version not engaged, CPU extended feature '%s' is not supported\n",
+				feature_name);
+		}
+
+	/* SSE3 */
+	} else if (boot_cpu_has(X86_FEATURE_SSSE3)) {
+		ret = crypto_register_shash(&sha1_ssse3_alg);
+		if (!ret) {
+			sha1_ssse3_registered = 1;
+			driver_name = sha1_ssse3_alg.base.cra_driver_name;
+		}
 	}
 
+	pr_info("CPU-optimized crypto module loaded (SSSE3=%s, AVX=%s, AVX2=%s, SHA-NI=%s): driver=%s\n",
+		sha1_ssse3_registered ? "yes" : "no",
+		sha1_avx_registered ? "yes" : "no",
+		sha1_avx2_registered ? "yes" : "no",
+		sha1_ni_registered ? "yes" : "no",
+		driver_name);
 	return 0;
-fail:
-	return -ENODEV;
 }
 
 static void __exit sha1_ssse3_mod_fini(void)
diff --git a/arch/x86/crypto/sha256_ssse3_glue.c b/arch/x86/crypto/sha256_ssse3_glue.c
index 8a0fb308fbba..cd7bf2b48f3d 100644
--- a/arch/x86/crypto/sha256_ssse3_glue.c
+++ b/arch/x86/crypto/sha256_ssse3_glue.c
@@ -150,19 +150,18 @@ static struct shash_alg sha256_ssse3_algs[] = { {
 	}
 } };
 
-static int register_sha256_ssse3(void)
-{
-	if (boot_cpu_has(X86_FEATURE_SSSE3))
-		return crypto_register_shashes(sha256_ssse3_algs,
-				ARRAY_SIZE(sha256_ssse3_algs));
-	return 0;
-}
+static bool sha256_ssse3_registered;
+static bool sha256_avx_registered;
+static bool sha256_avx2_registered;
+static bool sha256_ni_registered;
 
 static void unregister_sha256_ssse3(void)
 {
-	if (boot_cpu_has(X86_FEATURE_SSSE3))
+	if (sha256_ssse3_registered) {
 		crypto_unregister_shashes(sha256_ssse3_algs,
 				ARRAY_SIZE(sha256_ssse3_algs));
+		sha256_ssse3_registered = 0;
+	}
 }
 
 asmlinkage void sha256_transform_avx(struct sha256_state *state,
@@ -215,30 +214,13 @@ static struct shash_alg sha256_avx_algs[] = { {
 	}
 } };
 
-static bool avx_usable(void)
-{
-	if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) {
-		if (boot_cpu_has(X86_FEATURE_AVX))
-			pr_info("AVX detected but unusable.\n");
-		return false;
-	}
-
-	return true;
-}
-
-static int register_sha256_avx(void)
-{
-	if (avx_usable())
-		return crypto_register_shashes(sha256_avx_algs,
-				ARRAY_SIZE(sha256_avx_algs));
-	return 0;
-}
-
 static void unregister_sha256_avx(void)
 {
-	if (avx_usable())
+	if (sha256_avx_registered) {
 		crypto_unregister_shashes(sha256_avx_algs,
 				ARRAY_SIZE(sha256_avx_algs));
+		sha256_avx_registered = 0;
+	}
 }
 
 asmlinkage void sha256_transform_rorx(struct sha256_state *state,
@@ -291,28 +273,13 @@ static struct shash_alg sha256_avx2_algs[] = { {
 	}
 } };
 
-static bool avx2_usable(void)
-{
-	if (avx_usable() && boot_cpu_has(X86_FEATURE_AVX2) &&
-		    boot_cpu_has(X86_FEATURE_BMI2))
-		return true;
-
-	return false;
-}
-
-static int register_sha256_avx2(void)
-{
-	if (avx2_usable())
-		return crypto_register_shashes(sha256_avx2_algs,
-				ARRAY_SIZE(sha256_avx2_algs));
-	return 0;
-}
-
 static void unregister_sha256_avx2(void)
 {
-	if (avx2_usable())
+	if (sha256_avx2_registered) {
 		crypto_unregister_shashes(sha256_avx2_algs,
 				ARRAY_SIZE(sha256_avx2_algs));
+		sha256_avx2_registered = 0;
+	}
 }
 
 #ifdef CONFIG_AS_SHA256_NI
@@ -375,55 +342,92 @@ static const struct x86_cpu_id module_cpu_ids[] = {
 };
 MODULE_DEVICE_TABLE(x86cpu, module_cpu_ids);
 
-static int register_sha256_ni(void)
-{
-	if (boot_cpu_has(X86_FEATURE_SHA_NI))
-		return crypto_register_shashes(sha256_ni_algs,
-				ARRAY_SIZE(sha256_ni_algs));
-	return 0;
-}
-
 static void unregister_sha256_ni(void)
 {
-	if (boot_cpu_has(X86_FEATURE_SHA_NI))
+	if (sha256_ni_registered) {
 		crypto_unregister_shashes(sha256_ni_algs,
 				ARRAY_SIZE(sha256_ni_algs));
+		sha256_ni_registered = 0;
+	}
 }
 
 #else
-static inline int register_sha256_ni(void) { return 0; }
 static inline void unregister_sha256_ni(void) { }
 #endif
 
 static int __init sha256_ssse3_mod_init(void)
 {
-	if (!x86_match_cpu(module_cpu_ids))
+	const char *feature_name;
+	const char *driver_name = NULL;
+	const char *driver_name2 = NULL;
+	int ret;
+
+	if (!x86_match_cpu(module_cpu_ids)) {
+		pr_info("CPU-optimized crypto module not loaded, required CPU features (SSSE3, AVX, AVX2, or SHA-NI) not supported\n");
 		return -ENODEV;
+	}
 
-	if (register_sha256_ssse3())
-		goto fail;
+	/* SHA-NI */
+	if (boot_cpu_has(X86_FEATURE_SHA_NI)) {
 
-	if (register_sha256_avx()) {
-		unregister_sha256_ssse3();
-		goto fail;
-	}
+		ret = crypto_register_shashes(sha256_ni_algs,
+						ARRAY_SIZE(sha256_ni_algs));
+		if (!ret) {
+			sha256_ni_registered = 1;
+			driver_name = sha256_ni_algs[0].base.cra_driver_name;
+			driver_name2 = sha256_ni_algs[1].base.cra_driver_name;
+		}
 
-	if (register_sha256_avx2()) {
-		unregister_sha256_avx();
-		unregister_sha256_ssse3();
-		goto fail;
-	}
+	/* AVX2 */
+	} else if (boot_cpu_has(X86_FEATURE_AVX2)) {
+
+		if (boot_cpu_has(X86_FEATURE_BMI2)) {
+			ret = crypto_register_shashes(sha256_avx2_algs,
+						ARRAY_SIZE(sha256_avx2_algs));
+			if (!ret) {
+				sha256_avx2_registered = 1;
+				driver_name = sha256_avx2_algs[0].base.cra_driver_name;
+				driver_name2 = sha256_avx2_algs[1].base.cra_driver_name;
+			}
+		} else {
+			pr_info("AVX2-optimized version not engaged, all required CPU features (AVX2, BMI2) not supported\n");
+		}
 
-	if (register_sha256_ni()) {
-		unregister_sha256_avx2();
-		unregister_sha256_avx();
-		unregister_sha256_ssse3();
-		goto fail;
+	/* AVX */
+	} else if (boot_cpu_has(X86_FEATURE_AVX)) {
+
+		if (cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM,
+			       &feature_name)) {
+			ret = crypto_register_shashes(sha256_avx_algs,
+						ARRAY_SIZE(sha256_avx_algs));
+			if (!ret) {
+				sha256_avx_registered = 1;
+				driver_name = sha256_avx_algs[0].base.cra_driver_name;
+				driver_name2 = sha256_avx_algs[1].base.cra_driver_name;
+			}
+		} else {
+			pr_info("AVX-optimized version not engaged, CPU extended feature '%s' is not supported\n",
+				feature_name);
+		}
+
+	/* SSE3 */
+	} else if (boot_cpu_has(X86_FEATURE_SSSE3)) {
+		ret = crypto_register_shashes(sha256_ssse3_algs,
+					      ARRAY_SIZE(sha256_ssse3_algs));
+		if (!ret) {
+			sha256_ssse3_registered = 1;
+			driver_name = sha256_ssse3_algs[0].base.cra_driver_name;
+			driver_name2 = sha256_ssse3_algs[1].base.cra_driver_name;
+		}
 	}
 
+	pr_info("CPU-optimized crypto module loaded (SSSE3=%s, AVX=%s, AVX2=%s, SHA-NI=%s): drivers=%s, %s\n",
+		sha256_ssse3_registered ? "yes" : "no",
+		sha256_avx_registered ? "yes" : "no",
+		sha256_avx2_registered ? "yes" : "no",
+		sha256_ni_registered ? "yes" : "no",
+		driver_name, driver_name2);
 	return 0;
-fail:
-	return -ENODEV;
 }
 
 static void __exit sha256_ssse3_mod_fini(void)
diff --git a/arch/x86/crypto/sha512_ssse3_glue.c b/arch/x86/crypto/sha512_ssse3_glue.c
index fd5075a32613..df9f8207cc79 100644
--- a/arch/x86/crypto/sha512_ssse3_glue.c
+++ b/arch/x86/crypto/sha512_ssse3_glue.c
@@ -149,33 +149,21 @@ static struct shash_alg sha512_ssse3_algs[] = { {
 	}
 } };
 
-static int register_sha512_ssse3(void)
-{
-	if (boot_cpu_has(X86_FEATURE_SSSE3))
-		return crypto_register_shashes(sha512_ssse3_algs,
-			ARRAY_SIZE(sha512_ssse3_algs));
-	return 0;
-}
+static bool sha512_ssse3_registered;
+static bool sha512_avx_registered;
+static bool sha512_avx2_registered;
 
 static void unregister_sha512_ssse3(void)
 {
-	if (boot_cpu_has(X86_FEATURE_SSSE3))
+	if (sha512_ssse3_registered) {
 		crypto_unregister_shashes(sha512_ssse3_algs,
 			ARRAY_SIZE(sha512_ssse3_algs));
+		sha512_ssse3_registered = 0;
+	}
 }
 
 asmlinkage void sha512_transform_avx(struct sha512_state *state,
 				     const u8 *data, int blocks);
-static bool avx_usable(void)
-{
-	if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) {
-		if (boot_cpu_has(X86_FEATURE_AVX))
-			pr_info("AVX detected but unusable.\n");
-		return false;
-	}
-
-	return true;
-}
 
 static int sha512_avx_update(struct shash_desc *desc, const u8 *data,
 		       unsigned int len)
@@ -225,19 +213,13 @@ static struct shash_alg sha512_avx_algs[] = { {
 	}
 } };
 
-static int register_sha512_avx(void)
-{
-	if (avx_usable())
-		return crypto_register_shashes(sha512_avx_algs,
-			ARRAY_SIZE(sha512_avx_algs));
-	return 0;
-}
-
 static void unregister_sha512_avx(void)
 {
-	if (avx_usable())
+	if (sha512_avx_registered) {
 		crypto_unregister_shashes(sha512_avx_algs,
 			ARRAY_SIZE(sha512_avx_algs));
+		sha512_avx_registered = 0;
+	}
 }
 
 asmlinkage void sha512_transform_rorx(struct sha512_state *state,
@@ -291,22 +273,6 @@ static struct shash_alg sha512_avx2_algs[] = { {
 	}
 } };
 
-static bool avx2_usable(void)
-{
-	if (avx_usable() && boot_cpu_has(X86_FEATURE_AVX2) &&
-		    boot_cpu_has(X86_FEATURE_BMI2))
-		return true;
-
-	return false;
-}
-
-static int register_sha512_avx2(void)
-{
-	if (avx2_usable())
-		return crypto_register_shashes(sha512_avx2_algs,
-			ARRAY_SIZE(sha512_avx2_algs));
-	return 0;
-}
 static const struct x86_cpu_id module_cpu_ids[] = {
 	X86_MATCH_FEATURE(X86_FEATURE_AVX2, NULL),
 	X86_MATCH_FEATURE(X86_FEATURE_AVX, NULL),
@@ -317,33 +283,73 @@ MODULE_DEVICE_TABLE(x86cpu, module_cpu_ids);
 
 static void unregister_sha512_avx2(void)
 {
-	if (avx2_usable())
+	if (sha512_avx2_registered) {
 		crypto_unregister_shashes(sha512_avx2_algs,
 			ARRAY_SIZE(sha512_avx2_algs));
+		sha512_avx2_registered = 0;
+	}
 }
 
 static int __init sha512_ssse3_mod_init(void)
 {
-	if (!x86_match_cpu(module_cpu_ids))
+	const char *feature_name;
+	const char *driver_name = NULL;
+	const char *driver_name2 = NULL;
+	int ret;
+
+	if (!x86_match_cpu(module_cpu_ids)) {
+		pr_info("CPU-optimized crypto module not loaded, required CPU features (SSSE3, AVX, or AVX2) not supported\n");
 		return -ENODEV;
+	}
 
-	if (register_sha512_ssse3())
-		goto fail;
+	/* AVX2 */
+	if (boot_cpu_has(X86_FEATURE_AVX2)) {
+		if (boot_cpu_has(X86_FEATURE_BMI2)) {
+			ret = crypto_register_shashes(sha512_avx2_algs,
+					ARRAY_SIZE(sha512_avx2_algs));
+			if (!ret) {
+				sha512_avx2_registered = 1;
+				driver_name = sha512_avx2_algs[0].base.cra_driver_name;
+				driver_name2 = sha512_avx2_algs[1].base.cra_driver_name;
+			}
+		} else {
+			pr_info("AVX2-optimized version not engaged, all required CPU features (AVX2, BMI2) not supported\n");
+		}
 
-	if (register_sha512_avx()) {
-		unregister_sha512_ssse3();
-		goto fail;
-	}
+	/* AVX */
+	} else if (boot_cpu_has(X86_FEATURE_AVX)) {
+
+		if (cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM,
+				       &feature_name)) {
+			ret = crypto_register_shashes(sha512_avx_algs,
+					ARRAY_SIZE(sha512_avx_algs));
+			if (!ret) {
+				sha512_avx_registered = 1;
+				driver_name = sha512_avx_algs[0].base.cra_driver_name;
+				driver_name2 = sha512_avx_algs[1].base.cra_driver_name;
+			}
+		} else {
+			pr_info("AVX-optimized version not engaged, CPU extended feature '%s' is not supported\n",
+				feature_name);
+		}
 
-	if (register_sha512_avx2()) {
-		unregister_sha512_avx();
-		unregister_sha512_ssse3();
-		goto fail;
+	/* SSE3 */
+	} else if (boot_cpu_has(X86_FEATURE_SSSE3)) {
+		ret = crypto_register_shashes(sha512_ssse3_algs,
+					ARRAY_SIZE(sha512_ssse3_algs));
+		if (!ret) {
+			sha512_ssse3_registered = 1;
+			driver_name = sha512_ssse3_algs[0].base.cra_driver_name;
+			driver_name2 = sha512_ssse3_algs[1].base.cra_driver_name;
+		}
 	}
 
+	pr_info("CPU-optimized crypto module loaded (SSSE3=%s, AVX=%s, AVX2=%s): drivers=%s, %s\n",
+		sha512_ssse3_registered ? "yes" : "no",
+		sha512_avx_registered ? "yes" : "no",
+		sha512_avx2_registered ? "yes" : "no",
+		driver_name, driver_name2);
 	return 0;
-fail:
-	return -ENODEV;
 }
 
 static void __exit sha512_ssse3_mod_fini(void)
-- 
2.37.3

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ