lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250426065041.1551914-12-ebiggers@kernel.org>
Date: Fri, 25 Apr 2025 23:50:37 -0700
From: Eric Biggers <ebiggers@...nel.org>
To: linux-crypto@...r.kernel.org
Cc: linux-kernel@...r.kernel.org,
	linux-arch@...r.kernel.org,
	linux-arm-kernel@...ts.infradead.org,
	linux-mips@...r.kernel.org,
	linuxppc-dev@...ts.ozlabs.org,
	linux-riscv@...ts.infradead.org,
	sparclinux@...r.kernel.org,
	linux-s390@...r.kernel.org,
	x86@...nel.org,
	Ard Biesheuvel <ardb@...nel.org>,
	"Jason A . Donenfeld " <Jason@...c4.com>,
	Linus Torvalds <torvalds@...ux-foundation.org>
Subject: [PATCH 11/13] crypto: x86/sha256 - implement library instead of shash

From: Eric Biggers <ebiggers@...gle.com>

Instead of providing crypto_shash algorithms for the arch-optimized
SHA-256 code, instead implement the SHA-256 library.  This is much
simpler, it makes the SHA-256 library functions be arch-optimized, and
it fixes the longstanding issue where the arch-optimized SHA-256 was
disabled by default.  SHA-256 still remains available through
crypto_shash, but individual architectures no longer need to handle it.

To match sha256_blocks_arch(), change the type of the nblocks parameter
of the assembly functions from int to size_t.  The assembly functions
actually already treated it as size_t.

Signed-off-by: Eric Biggers <ebiggers@...gle.com>
---
 arch/x86/crypto/Kconfig                       |  14 -
 arch/x86/crypto/Makefile                      |   3 -
 arch/x86/crypto/sha256_ssse3_glue.c           | 432 ------------------
 arch/x86/lib/crypto/Kconfig                   |   7 +
 arch/x86/lib/crypto/Makefile                  |   3 +
 arch/x86/{ => lib}/crypto/sha256-avx-asm.S    |  12 +-
 arch/x86/{ => lib}/crypto/sha256-avx2-asm.S   |  12 +-
 .../crypto/sha256-ni-asm.S}                   |  36 +-
 arch/x86/{ => lib}/crypto/sha256-ssse3-asm.S  |  14 +-
 arch/x86/lib/crypto/sha256.c                  |  74 +++
 10 files changed, 118 insertions(+), 489 deletions(-)
 delete mode 100644 arch/x86/crypto/sha256_ssse3_glue.c
 rename arch/x86/{ => lib}/crypto/sha256-avx-asm.S (98%)
 rename arch/x86/{ => lib}/crypto/sha256-avx2-asm.S (98%)
 rename arch/x86/{crypto/sha256_ni_asm.S => lib/crypto/sha256-ni-asm.S} (85%)
 rename arch/x86/{ => lib}/crypto/sha256-ssse3-asm.S (98%)
 create mode 100644 arch/x86/lib/crypto/sha256.c

diff --git a/arch/x86/crypto/Kconfig b/arch/x86/crypto/Kconfig
index 9e941362e4cd5..56cfdc79e2c66 100644
--- a/arch/x86/crypto/Kconfig
+++ b/arch/x86/crypto/Kconfig
@@ -388,24 +388,10 @@ config CRYPTO_SHA1_SSSE3
 	  - SSSE3 (Supplemental SSE3)
 	  - AVX (Advanced Vector Extensions)
 	  - AVX2 (Advanced Vector Extensions 2)
 	  - SHA-NI (SHA Extensions New Instructions)
 
-config CRYPTO_SHA256_SSSE3
-	tristate "Hash functions: SHA-224 and SHA-256 (SSSE3/AVX/AVX2/SHA-NI)"
-	depends on 64BIT
-	select CRYPTO_SHA256
-	select CRYPTO_HASH
-	help
-	  SHA-224 and SHA-256 secure hash algorithms (FIPS 180)
-
-	  Architecture: x86_64 using:
-	  - SSSE3 (Supplemental SSE3)
-	  - AVX (Advanced Vector Extensions)
-	  - AVX2 (Advanced Vector Extensions 2)
-	  - SHA-NI (SHA Extensions New Instructions)
-
 config CRYPTO_SHA512_SSSE3
 	tristate "Hash functions: SHA-384 and SHA-512 (SSSE3/AVX/AVX2)"
 	depends on 64BIT
 	select CRYPTO_SHA512
 	select CRYPTO_HASH
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index fad59a6c6c26f..aa289a9e0153b 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -52,13 +52,10 @@ aesni-intel-$(CONFIG_64BIT) += aes-gcm-avx10-x86_64.o
 endif
 
 obj-$(CONFIG_CRYPTO_SHA1_SSSE3) += sha1-ssse3.o
 sha1-ssse3-y := sha1_avx2_x86_64_asm.o sha1_ssse3_asm.o sha1_ni_asm.o sha1_ssse3_glue.o
 
-obj-$(CONFIG_CRYPTO_SHA256_SSSE3) += sha256-ssse3.o
-sha256-ssse3-y := sha256-ssse3-asm.o sha256-avx-asm.o sha256-avx2-asm.o sha256_ni_asm.o sha256_ssse3_glue.o
-
 obj-$(CONFIG_CRYPTO_SHA512_SSSE3) += sha512-ssse3.o
 sha512-ssse3-y := sha512-ssse3-asm.o sha512-avx-asm.o sha512-avx2-asm.o sha512_ssse3_glue.o
 
 obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o
 ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o
diff --git a/arch/x86/crypto/sha256_ssse3_glue.c b/arch/x86/crypto/sha256_ssse3_glue.c
deleted file mode 100644
index a5d3be00550b8..0000000000000
--- a/arch/x86/crypto/sha256_ssse3_glue.c
+++ /dev/null
@@ -1,432 +0,0 @@
-/*
- * Cryptographic API.
- *
- * Glue code for the SHA256 Secure Hash Algorithm assembler implementations
- * using SSSE3, AVX, AVX2, and SHA-NI instructions.
- *
- * This file is based on sha256_generic.c
- *
- * Copyright (C) 2013 Intel Corporation.
- *
- * Author:
- *     Tim Chen <tim.c.chen@...ux.intel.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-
-#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
-
-#include <asm/cpu_device_id.h>
-#include <asm/fpu/api.h>
-#include <crypto/internal/hash.h>
-#include <crypto/sha2.h>
-#include <crypto/sha256_base.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-
-asmlinkage void sha256_transform_ssse3(struct crypto_sha256_state *state,
-				       const u8 *data, int blocks);
-
-static const struct x86_cpu_id module_cpu_ids[] = {
-	X86_MATCH_FEATURE(X86_FEATURE_SHA_NI, NULL),
-	X86_MATCH_FEATURE(X86_FEATURE_AVX2, NULL),
-	X86_MATCH_FEATURE(X86_FEATURE_AVX, NULL),
-	X86_MATCH_FEATURE(X86_FEATURE_SSSE3, NULL),
-	{}
-};
-MODULE_DEVICE_TABLE(x86cpu, module_cpu_ids);
-
-static int _sha256_update(struct shash_desc *desc, const u8 *data,
-			  unsigned int len,
-			  sha256_block_fn *sha256_xform)
-{
-	int remain;
-
-	/*
-	 * Make sure struct crypto_sha256_state begins directly with the SHA256
-	 * 256-bit internal state, as this is what the asm functions expect.
-	 */
-	BUILD_BUG_ON(offsetof(struct crypto_sha256_state, state) != 0);
-
-	kernel_fpu_begin();
-	remain = sha256_base_do_update_blocks(desc, data, len, sha256_xform);
-	kernel_fpu_end();
-
-	return remain;
-}
-
-static int sha256_finup(struct shash_desc *desc, const u8 *data,
-	      unsigned int len, u8 *out, sha256_block_fn *sha256_xform)
-{
-	kernel_fpu_begin();
-	sha256_base_do_finup(desc, data, len, sha256_xform);
-	kernel_fpu_end();
-
-	return sha256_base_finish(desc, out);
-}
-
-static int sha256_ssse3_update(struct shash_desc *desc, const u8 *data,
-			 unsigned int len)
-{
-	return _sha256_update(desc, data, len, sha256_transform_ssse3);
-}
-
-static int sha256_ssse3_finup(struct shash_desc *desc, const u8 *data,
-	      unsigned int len, u8 *out)
-{
-	return sha256_finup(desc, data, len, out, sha256_transform_ssse3);
-}
-
-static int sha256_ssse3_digest(struct shash_desc *desc, const u8 *data,
-	      unsigned int len, u8 *out)
-{
-	return sha256_base_init(desc) ?:
-	       sha256_ssse3_finup(desc, data, len, out);
-}
-
-static struct shash_alg sha256_ssse3_algs[] = { {
-	.digestsize	=	SHA256_DIGEST_SIZE,
-	.init		=	sha256_base_init,
-	.update		=	sha256_ssse3_update,
-	.finup		=	sha256_ssse3_finup,
-	.digest		=	sha256_ssse3_digest,
-	.descsize	=	sizeof(struct crypto_sha256_state),
-	.base		=	{
-		.cra_name	=	"sha256",
-		.cra_driver_name =	"sha256-ssse3",
-		.cra_priority	=	150,
-		.cra_flags	=	CRYPTO_AHASH_ALG_BLOCK_ONLY |
-					CRYPTO_AHASH_ALG_FINUP_MAX,
-		.cra_blocksize	=	SHA256_BLOCK_SIZE,
-		.cra_module	=	THIS_MODULE,
-	}
-}, {
-	.digestsize	=	SHA224_DIGEST_SIZE,
-	.init		=	sha224_base_init,
-	.update		=	sha256_ssse3_update,
-	.finup		=	sha256_ssse3_finup,
-	.descsize	=	sizeof(struct crypto_sha256_state),
-	.base		=	{
-		.cra_name	=	"sha224",
-		.cra_driver_name =	"sha224-ssse3",
-		.cra_priority	=	150,
-		.cra_flags	=	CRYPTO_AHASH_ALG_BLOCK_ONLY |
-					CRYPTO_AHASH_ALG_FINUP_MAX,
-		.cra_blocksize	=	SHA224_BLOCK_SIZE,
-		.cra_module	=	THIS_MODULE,
-	}
-} };
-
-static int register_sha256_ssse3(void)
-{
-	if (boot_cpu_has(X86_FEATURE_SSSE3))
-		return crypto_register_shashes(sha256_ssse3_algs,
-				ARRAY_SIZE(sha256_ssse3_algs));
-	return 0;
-}
-
-static void unregister_sha256_ssse3(void)
-{
-	if (boot_cpu_has(X86_FEATURE_SSSE3))
-		crypto_unregister_shashes(sha256_ssse3_algs,
-				ARRAY_SIZE(sha256_ssse3_algs));
-}
-
-asmlinkage void sha256_transform_avx(struct crypto_sha256_state *state,
-				     const u8 *data, int blocks);
-
-static int sha256_avx_update(struct shash_desc *desc, const u8 *data,
-			 unsigned int len)
-{
-	return _sha256_update(desc, data, len, sha256_transform_avx);
-}
-
-static int sha256_avx_finup(struct shash_desc *desc, const u8 *data,
-		      unsigned int len, u8 *out)
-{
-	return sha256_finup(desc, data, len, out, sha256_transform_avx);
-}
-
-static int sha256_avx_digest(struct shash_desc *desc, const u8 *data,
-		      unsigned int len, u8 *out)
-{
-	return sha256_base_init(desc) ?:
-	       sha256_avx_finup(desc, data, len, out);
-}
-
-static struct shash_alg sha256_avx_algs[] = { {
-	.digestsize	=	SHA256_DIGEST_SIZE,
-	.init		=	sha256_base_init,
-	.update		=	sha256_avx_update,
-	.finup		=	sha256_avx_finup,
-	.digest		=	sha256_avx_digest,
-	.descsize	=	sizeof(struct crypto_sha256_state),
-	.base		=	{
-		.cra_name	=	"sha256",
-		.cra_driver_name =	"sha256-avx",
-		.cra_priority	=	160,
-		.cra_flags	=	CRYPTO_AHASH_ALG_BLOCK_ONLY |
-					CRYPTO_AHASH_ALG_FINUP_MAX,
-		.cra_blocksize	=	SHA256_BLOCK_SIZE,
-		.cra_module	=	THIS_MODULE,
-	}
-}, {
-	.digestsize	=	SHA224_DIGEST_SIZE,
-	.init		=	sha224_base_init,
-	.update		=	sha256_avx_update,
-	.finup		=	sha256_avx_finup,
-	.descsize	=	sizeof(struct crypto_sha256_state),
-	.base		=	{
-		.cra_name	=	"sha224",
-		.cra_driver_name =	"sha224-avx",
-		.cra_priority	=	160,
-		.cra_flags	=	CRYPTO_AHASH_ALG_BLOCK_ONLY |
-					CRYPTO_AHASH_ALG_FINUP_MAX,
-		.cra_blocksize	=	SHA224_BLOCK_SIZE,
-		.cra_module	=	THIS_MODULE,
-	}
-} };
-
-static bool avx_usable(void)
-{
-	if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) {
-		if (boot_cpu_has(X86_FEATURE_AVX))
-			pr_info("AVX detected but unusable.\n");
-		return false;
-	}
-
-	return true;
-}
-
-static int register_sha256_avx(void)
-{
-	if (avx_usable())
-		return crypto_register_shashes(sha256_avx_algs,
-				ARRAY_SIZE(sha256_avx_algs));
-	return 0;
-}
-
-static void unregister_sha256_avx(void)
-{
-	if (avx_usable())
-		crypto_unregister_shashes(sha256_avx_algs,
-				ARRAY_SIZE(sha256_avx_algs));
-}
-
-asmlinkage void sha256_transform_rorx(struct crypto_sha256_state *state,
-				      const u8 *data, int blocks);
-
-static int sha256_avx2_update(struct shash_desc *desc, const u8 *data,
-			 unsigned int len)
-{
-	return _sha256_update(desc, data, len, sha256_transform_rorx);
-}
-
-static int sha256_avx2_finup(struct shash_desc *desc, const u8 *data,
-		      unsigned int len, u8 *out)
-{
-	return sha256_finup(desc, data, len, out, sha256_transform_rorx);
-}
-
-static int sha256_avx2_digest(struct shash_desc *desc, const u8 *data,
-		      unsigned int len, u8 *out)
-{
-	return sha256_base_init(desc) ?:
-	       sha256_avx2_finup(desc, data, len, out);
-}
-
-static struct shash_alg sha256_avx2_algs[] = { {
-	.digestsize	=	SHA256_DIGEST_SIZE,
-	.init		=	sha256_base_init,
-	.update		=	sha256_avx2_update,
-	.finup		=	sha256_avx2_finup,
-	.digest		=	sha256_avx2_digest,
-	.descsize	=	sizeof(struct crypto_sha256_state),
-	.base		=	{
-		.cra_name	=	"sha256",
-		.cra_driver_name =	"sha256-avx2",
-		.cra_priority	=	170,
-		.cra_flags	=	CRYPTO_AHASH_ALG_BLOCK_ONLY |
-					CRYPTO_AHASH_ALG_FINUP_MAX,
-		.cra_blocksize	=	SHA256_BLOCK_SIZE,
-		.cra_module	=	THIS_MODULE,
-	}
-}, {
-	.digestsize	=	SHA224_DIGEST_SIZE,
-	.init		=	sha224_base_init,
-	.update		=	sha256_avx2_update,
-	.finup		=	sha256_avx2_finup,
-	.descsize	=	sizeof(struct crypto_sha256_state),
-	.base		=	{
-		.cra_name	=	"sha224",
-		.cra_driver_name =	"sha224-avx2",
-		.cra_priority	=	170,
-		.cra_flags	=	CRYPTO_AHASH_ALG_BLOCK_ONLY |
-					CRYPTO_AHASH_ALG_FINUP_MAX,
-		.cra_blocksize	=	SHA224_BLOCK_SIZE,
-		.cra_module	=	THIS_MODULE,
-	}
-} };
-
-static bool avx2_usable(void)
-{
-	if (avx_usable() && boot_cpu_has(X86_FEATURE_AVX2) &&
-		    boot_cpu_has(X86_FEATURE_BMI2))
-		return true;
-
-	return false;
-}
-
-static int register_sha256_avx2(void)
-{
-	if (avx2_usable())
-		return crypto_register_shashes(sha256_avx2_algs,
-				ARRAY_SIZE(sha256_avx2_algs));
-	return 0;
-}
-
-static void unregister_sha256_avx2(void)
-{
-	if (avx2_usable())
-		crypto_unregister_shashes(sha256_avx2_algs,
-				ARRAY_SIZE(sha256_avx2_algs));
-}
-
-asmlinkage void sha256_ni_transform(struct crypto_sha256_state *digest,
-				    const u8 *data, int rounds);
-
-static int sha256_ni_update(struct shash_desc *desc, const u8 *data,
-			 unsigned int len)
-{
-	return _sha256_update(desc, data, len, sha256_ni_transform);
-}
-
-static int sha256_ni_finup(struct shash_desc *desc, const u8 *data,
-		      unsigned int len, u8 *out)
-{
-	return sha256_finup(desc, data, len, out, sha256_ni_transform);
-}
-
-static int sha256_ni_digest(struct shash_desc *desc, const u8 *data,
-		      unsigned int len, u8 *out)
-{
-	return sha256_base_init(desc) ?:
-	       sha256_ni_finup(desc, data, len, out);
-}
-
-static struct shash_alg sha256_ni_algs[] = { {
-	.digestsize	=	SHA256_DIGEST_SIZE,
-	.init		=	sha256_base_init,
-	.update		=	sha256_ni_update,
-	.finup		=	sha256_ni_finup,
-	.digest		=	sha256_ni_digest,
-	.descsize	=	sizeof(struct crypto_sha256_state),
-	.base		=	{
-		.cra_name	=	"sha256",
-		.cra_driver_name =	"sha256-ni",
-		.cra_priority	=	250,
-		.cra_flags	=	CRYPTO_AHASH_ALG_BLOCK_ONLY |
-					CRYPTO_AHASH_ALG_FINUP_MAX,
-		.cra_blocksize	=	SHA256_BLOCK_SIZE,
-		.cra_module	=	THIS_MODULE,
-	}
-}, {
-	.digestsize	=	SHA224_DIGEST_SIZE,
-	.init		=	sha224_base_init,
-	.update		=	sha256_ni_update,
-	.finup		=	sha256_ni_finup,
-	.descsize	=	sizeof(struct crypto_sha256_state),
-	.base		=	{
-		.cra_name	=	"sha224",
-		.cra_driver_name =	"sha224-ni",
-		.cra_priority	=	250,
-		.cra_flags	=	CRYPTO_AHASH_ALG_BLOCK_ONLY |
-					CRYPTO_AHASH_ALG_FINUP_MAX,
-		.cra_blocksize	=	SHA224_BLOCK_SIZE,
-		.cra_module	=	THIS_MODULE,
-	}
-} };
-
-static int register_sha256_ni(void)
-{
-	if (boot_cpu_has(X86_FEATURE_SHA_NI))
-		return crypto_register_shashes(sha256_ni_algs,
-				ARRAY_SIZE(sha256_ni_algs));
-	return 0;
-}
-
-static void unregister_sha256_ni(void)
-{
-	if (boot_cpu_has(X86_FEATURE_SHA_NI))
-		crypto_unregister_shashes(sha256_ni_algs,
-				ARRAY_SIZE(sha256_ni_algs));
-}
-
-static int __init sha256_ssse3_mod_init(void)
-{
-	if (!x86_match_cpu(module_cpu_ids))
-		return -ENODEV;
-
-	if (register_sha256_ssse3())
-		goto fail;
-
-	if (register_sha256_avx()) {
-		unregister_sha256_ssse3();
-		goto fail;
-	}
-
-	if (register_sha256_avx2()) {
-		unregister_sha256_avx();
-		unregister_sha256_ssse3();
-		goto fail;
-	}
-
-	if (register_sha256_ni()) {
-		unregister_sha256_avx2();
-		unregister_sha256_avx();
-		unregister_sha256_ssse3();
-		goto fail;
-	}
-
-	return 0;
-fail:
-	return -ENODEV;
-}
-
-static void __exit sha256_ssse3_mod_fini(void)
-{
-	unregister_sha256_ni();
-	unregister_sha256_avx2();
-	unregister_sha256_avx();
-	unregister_sha256_ssse3();
-}
-
-module_init(sha256_ssse3_mod_init);
-module_exit(sha256_ssse3_mod_fini);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("SHA256 Secure Hash Algorithm, Supplemental SSE3 accelerated");
-
-MODULE_ALIAS_CRYPTO("sha256");
-MODULE_ALIAS_CRYPTO("sha256-ssse3");
-MODULE_ALIAS_CRYPTO("sha256-avx");
-MODULE_ALIAS_CRYPTO("sha256-avx2");
-MODULE_ALIAS_CRYPTO("sha224");
-MODULE_ALIAS_CRYPTO("sha224-ssse3");
-MODULE_ALIAS_CRYPTO("sha224-avx");
-MODULE_ALIAS_CRYPTO("sha224-avx2");
-MODULE_ALIAS_CRYPTO("sha256-ni");
-MODULE_ALIAS_CRYPTO("sha224-ni");
diff --git a/arch/x86/lib/crypto/Kconfig b/arch/x86/lib/crypto/Kconfig
index 546fe2afe0b51..e344579db3d85 100644
--- a/arch/x86/lib/crypto/Kconfig
+++ b/arch/x86/lib/crypto/Kconfig
@@ -22,5 +22,12 @@ config CRYPTO_CHACHA20_X86_64
 config CRYPTO_POLY1305_X86_64
 	tristate
 	depends on 64BIT
 	default CRYPTO_LIB_POLY1305
 	select CRYPTO_ARCH_HAVE_LIB_POLY1305
+
+config CRYPTO_SHA256_X86_64
+	tristate
+	depends on 64BIT
+	default CRYPTO_LIB_SHA256
+	select CRYPTO_ARCH_HAVE_LIB_SHA256
+	select CRYPTO_LIB_SHA256_GENERIC
diff --git a/arch/x86/lib/crypto/Makefile b/arch/x86/lib/crypto/Makefile
index c2ff8c5f1046e..abceca3d31c01 100644
--- a/arch/x86/lib/crypto/Makefile
+++ b/arch/x86/lib/crypto/Makefile
@@ -8,10 +8,13 @@ chacha-x86_64-y := chacha-avx2-x86_64.o chacha-ssse3-x86_64.o chacha-avx512vl-x8
 
 obj-$(CONFIG_CRYPTO_POLY1305_X86_64) += poly1305-x86_64.o
 poly1305-x86_64-y := poly1305-x86_64-cryptogams.o poly1305_glue.o
 targets += poly1305-x86_64-cryptogams.S
 
+obj-$(CONFIG_CRYPTO_SHA256_X86_64) += sha256-x86_64.o
+sha256-x86_64-y := sha256.o sha256-ssse3-asm.o sha256-avx-asm.o sha256-avx2-asm.o sha256-ni-asm.o
+
 quiet_cmd_perlasm = PERLASM $@
       cmd_perlasm = $(PERL) $< > $@
 
 $(obj)/%.S: $(src)/%.pl FORCE
 	$(call if_changed,perlasm)
diff --git a/arch/x86/crypto/sha256-avx-asm.S b/arch/x86/lib/crypto/sha256-avx-asm.S
similarity index 98%
rename from arch/x86/crypto/sha256-avx-asm.S
rename to arch/x86/lib/crypto/sha256-avx-asm.S
index 53de72bdd851e..0d7b2c3e45d9a 100644
--- a/arch/x86/crypto/sha256-avx-asm.S
+++ b/arch/x86/lib/crypto/sha256-avx-asm.S
@@ -46,11 +46,11 @@
 ########################################################################
 # This code schedules 1 block at a time, with 4 lanes per block
 ########################################################################
 
 #include <linux/linkage.h>
-#include <linux/cfi_types.h>
+#include <linux/objtool.h>
 
 ## assume buffers not aligned
 #define    VMOVDQ vmovdqu
 
 ################################ Define Macros
@@ -339,17 +339,17 @@ a = TMP_
         add     y0, h                   # h = h + S1 + CH + k + w + S0 + MAJ
         ROTATE_ARGS
 .endm
 
 ########################################################################
-## void sha256_transform_avx(state sha256_state *state, const u8 *data, int blocks)
-## arg 1 : pointer to state
-## arg 2 : pointer to input data
-## arg 3 : Num blocks
+## void sha256_transform_avx(u32 state[SHA256_STATE_WORDS],
+##			     const u8 *data, size_t nblocks);
 ########################################################################
 .text
-SYM_TYPED_FUNC_START(sha256_transform_avx)
+SYM_FUNC_START(sha256_transform_avx)
+	ANNOTATE_NOENDBR	# since this is called only via static_call
+
 	pushq   %rbx
 	pushq   %r12
 	pushq   %r13
 	pushq   %r14
 	pushq   %r15
diff --git a/arch/x86/crypto/sha256-avx2-asm.S b/arch/x86/lib/crypto/sha256-avx2-asm.S
similarity index 98%
rename from arch/x86/crypto/sha256-avx2-asm.S
rename to arch/x86/lib/crypto/sha256-avx2-asm.S
index 0bbec1c75cd0b..25d3380321ec3 100644
--- a/arch/x86/crypto/sha256-avx2-asm.S
+++ b/arch/x86/lib/crypto/sha256-avx2-asm.S
@@ -47,11 +47,11 @@
 ########################################################################
 # This code schedules 2 blocks at a time, with 4 lanes per block
 ########################################################################
 
 #include <linux/linkage.h>
-#include <linux/cfi_types.h>
+#include <linux/objtool.h>
 
 ## assume buffers not aligned
 #define	VMOVDQ vmovdqu
 
 ################################ Define Macros
@@ -516,17 +516,17 @@ STACK_SIZE	= _CTX      + _CTX_SIZE
 	ROTATE_ARGS
 
 .endm
 
 ########################################################################
-## void sha256_transform_rorx(struct sha256_state *state, const u8 *data, int blocks)
-## arg 1 : pointer to state
-## arg 2 : pointer to input data
-## arg 3 : Num blocks
+## void sha256_transform_rorx(u32 state[SHA256_STATE_WORDS],
+##			      const u8 *data, size_t nblocks);
 ########################################################################
 .text
-SYM_TYPED_FUNC_START(sha256_transform_rorx)
+SYM_FUNC_START(sha256_transform_rorx)
+	ANNOTATE_NOENDBR	# since this is called only via static_call
+
 	pushq	%rbx
 	pushq	%r12
 	pushq	%r13
 	pushq	%r14
 	pushq	%r15
diff --git a/arch/x86/crypto/sha256_ni_asm.S b/arch/x86/lib/crypto/sha256-ni-asm.S
similarity index 85%
rename from arch/x86/crypto/sha256_ni_asm.S
rename to arch/x86/lib/crypto/sha256-ni-asm.S
index d515a55a3bc1d..d3548206cf3d4 100644
--- a/arch/x86/crypto/sha256_ni_asm.S
+++ b/arch/x86/lib/crypto/sha256-ni-asm.S
@@ -52,13 +52,13 @@
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  */
 
 #include <linux/linkage.h>
-#include <linux/cfi_types.h>
+#include <linux/objtool.h>
 
-#define DIGEST_PTR	%rdi	/* 1st arg */
+#define STATE_PTR	%rdi	/* 1st arg */
 #define DATA_PTR	%rsi	/* 2nd arg */
 #define NUM_BLKS	%rdx	/* 3rd arg */
 
 #define SHA256CONSTANTS	%rax
 
@@ -96,40 +96,36 @@
 	sha256msg1	\m0, \m3
 .endif
 .endm
 
 /*
- * Intel SHA Extensions optimized implementation of a SHA-256 update function
+ * Intel SHA Extensions optimized implementation of a SHA-256 block function
  *
- * The function takes a pointer to the current hash values, a pointer to the
- * input data, and a number of 64 byte blocks to process.  Once all blocks have
- * been processed, the digest pointer is  updated with the resulting hash value.
- * The function only processes complete blocks, there is no functionality to
- * store partial blocks.  All message padding and hash value initialization must
- * be done outside the update function.
+ * This function takes a pointer to the current SHA-256 state, a pointer to the
+ * input data, and the number of 64-byte blocks to process.  Once all blocks
+ * have been processed, the state is updated with the new state.  This function
+ * only processes complete blocks.  State initialization, buffering of partial
+ * blocks, and digest finalization is expected to be handled elsewhere.
  *
- * void sha256_ni_transform(uint32_t *digest, const void *data,
-		uint32_t numBlocks);
- * digest : pointer to digest
- * data: pointer to input data
- * numBlocks: Number of blocks to process
+ * void sha256_ni_transform(u32 state[SHA256_STATE_WORDS],
+ *			    const u8 *data, size_t nblocks);
  */
-
 .text
-SYM_TYPED_FUNC_START(sha256_ni_transform)
+SYM_FUNC_START(sha256_ni_transform)
+	ANNOTATE_NOENDBR	# since this is called only via static_call
 
 	shl		$6, NUM_BLKS		/*  convert to bytes */
 	jz		.Ldone_hash
 	add		DATA_PTR, NUM_BLKS	/* pointer to end of data */
 
 	/*
 	 * load initial hash values
 	 * Need to reorder these appropriately
 	 * DCBA, HGFE -> ABEF, CDGH
 	 */
-	movdqu		0*16(DIGEST_PTR), STATE0	/* DCBA */
-	movdqu		1*16(DIGEST_PTR), STATE1	/* HGFE */
+	movdqu		0*16(STATE_PTR), STATE0		/* DCBA */
+	movdqu		1*16(STATE_PTR), STATE1		/* HGFE */
 
 	movdqa		STATE0, TMP
 	punpcklqdq	STATE1, STATE0			/* FEBA */
 	punpckhqdq	TMP, STATE1			/* DCHG */
 	pshufd		$0x1B, STATE0, STATE0		/* ABEF */
@@ -164,12 +160,12 @@ SYM_TYPED_FUNC_START(sha256_ni_transform)
 	punpcklqdq	STATE1, STATE0			/* GHEF */
 	punpckhqdq	TMP, STATE1			/* ABCD */
 	pshufd		$0xB1, STATE0, STATE0		/* HGFE */
 	pshufd		$0x1B, STATE1, STATE1		/* DCBA */
 
-	movdqu		STATE1, 0*16(DIGEST_PTR)
-	movdqu		STATE0, 1*16(DIGEST_PTR)
+	movdqu		STATE1, 0*16(STATE_PTR)
+	movdqu		STATE0, 1*16(STATE_PTR)
 
 .Ldone_hash:
 
 	RET
 SYM_FUNC_END(sha256_ni_transform)
diff --git a/arch/x86/crypto/sha256-ssse3-asm.S b/arch/x86/lib/crypto/sha256-ssse3-asm.S
similarity index 98%
rename from arch/x86/crypto/sha256-ssse3-asm.S
rename to arch/x86/lib/crypto/sha256-ssse3-asm.S
index 93264ee445432..7f24a4cdcb257 100644
--- a/arch/x86/crypto/sha256-ssse3-asm.S
+++ b/arch/x86/lib/crypto/sha256-ssse3-asm.S
@@ -45,11 +45,11 @@
 # and search for that title.
 #
 ########################################################################
 
 #include <linux/linkage.h>
-#include <linux/cfi_types.h>
+#include <linux/objtool.h>
 
 ## assume buffers not aligned
 #define    MOVDQ movdqu
 
 ################################ Define Macros
@@ -346,19 +346,17 @@ a = TMP_
 	add     y0, h		      # h = h + S1 + CH + k + w + S0 + MAJ
 	ROTATE_ARGS
 .endm
 
 ########################################################################
-## void sha256_transform_ssse3(struct sha256_state *state, const u8 *data,
-##			       int blocks);
-## arg 1 : pointer to state
-##	   (struct sha256_state is assumed to begin with u32 state[8])
-## arg 2 : pointer to input data
-## arg 3 : Num blocks
+## void sha256_transform_ssse3(u32 state[SHA256_STATE_WORDS],
+##			       const u8 *data, size_t nblocks);
 ########################################################################
 .text
-SYM_TYPED_FUNC_START(sha256_transform_ssse3)
+SYM_FUNC_START(sha256_transform_ssse3)
+	ANNOTATE_NOENDBR	# since this is called only via static_call
+
 	pushq   %rbx
 	pushq   %r12
 	pushq   %r13
 	pushq   %r14
 	pushq   %r15
diff --git a/arch/x86/lib/crypto/sha256.c b/arch/x86/lib/crypto/sha256.c
new file mode 100644
index 0000000000000..47865b5cd94be
--- /dev/null
+++ b/arch/x86/lib/crypto/sha256.c
@@ -0,0 +1,74 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * SHA-256 optimized for x86_64
+ *
+ * Copyright 2025 Google LLC
+ */
+#include <asm/fpu/api.h>
+#include <crypto/internal/sha2.h>
+#include <crypto/internal/simd.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/static_call.h>
+
+asmlinkage void sha256_transform_ssse3(u32 state[SHA256_STATE_WORDS],
+				       const u8 *data, size_t nblocks);
+asmlinkage void sha256_transform_avx(u32 state[SHA256_STATE_WORDS],
+				     const u8 *data, size_t nblocks);
+asmlinkage void sha256_transform_rorx(u32 state[SHA256_STATE_WORDS],
+				      const u8 *data, size_t nblocks);
+asmlinkage void sha256_ni_transform(u32 state[SHA256_STATE_WORDS],
+				    const u8 *data, size_t nblocks);
+
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_sha256_x86);
+
+DEFINE_STATIC_CALL(sha256_blocks_x86, sha256_transform_ssse3);
+
+void sha256_blocks_arch(u32 state[SHA256_STATE_WORDS],
+			const u8 *data, size_t nblocks)
+{
+	if (static_branch_likely(&have_sha256_x86) && crypto_simd_usable()) {
+		kernel_fpu_begin();
+		static_call(sha256_blocks_x86)(state, data, nblocks);
+		kernel_fpu_end();
+	} else {
+		sha256_blocks_generic(state, data, nblocks);
+	}
+}
+EXPORT_SYMBOL(sha256_blocks_arch);
+
+bool sha256_is_arch_optimized(void)
+{
+	return static_key_enabled(&have_sha256_x86);
+}
+EXPORT_SYMBOL(sha256_is_arch_optimized);
+
+static int __init sha256_x86_mod_init(void)
+{
+	if (boot_cpu_has(X86_FEATURE_SHA_NI)) {
+		static_call_update(sha256_blocks_x86, sha256_ni_transform);
+	} else if (cpu_has_xfeatures(XFEATURE_MASK_SSE |
+				     XFEATURE_MASK_YMM, NULL) &&
+		   boot_cpu_has(X86_FEATURE_AVX)) {
+		if (boot_cpu_has(X86_FEATURE_AVX2) &&
+		    boot_cpu_has(X86_FEATURE_BMI2))
+			static_call_update(sha256_blocks_x86,
+					   sha256_transform_rorx);
+		else
+			static_call_update(sha256_blocks_x86,
+					   sha256_transform_avx);
+	} else if (!boot_cpu_has(X86_FEATURE_SSSE3)) {
+		return 0;
+	}
+	static_branch_enable(&have_sha256_x86);
+	return 0;
+}
+arch_initcall(sha256_x86_mod_init);
+
+static void __exit sha256_x86_mod_exit(void)
+{
+}
+module_exit(sha256_x86_mod_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("SHA-256 optimized for x86_64");
-- 
2.49.0


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ