[<prev] [next>] [day] [month] [year] [list]
Message-ID: <20250612025516.368-1-AlanSong-oc@zhaoxin.com>
Date: Thu, 12 Jun 2025 10:55:18 +0800
From: AlanSong-oc <AlanSong-oc@...oxin.com>
To: <herbert@...dor.apana.org.au>, <davem@...emloft.net>,
<linux-crypto@...r.kernel.org>, <linux-kernel@...r.kernel.org>
CC: <CobeChen@...oxin.com>, <TonyWWang-oc@...oxin.com>, <YunShen@...oxin.com>,
<GeorgeXue@...oxin.com>, <LeoLiu-oc@...oxin.com>, <HansHu@...oxin.com>,
AlanSong-oc <AlanSong-oc@...oxin.com>
Subject: [PATCH RESEND] crypto: padlock-sha - Add support for Zhaoxin processor
For Zhaoxin processors, the XSHA1 instruction requires the total memory
allocated at %rdi register must be 32 bytes, while the XSHA1 and
XSHA256 instruction doesn't perform any operation when %ecx is zero.
Due to these requirements, the current padlock-sha driver does not work
correctly with Zhaoxin processors. It cannot pass the self-tests and
therefore does not activate the driver on Zhaoxin processors. This issue
has been reported in Debian [1]. The self-tests fail with the
following messages [2]:
alg: shash: sha1-padlock-nano test failed (wrong result) on test vector 0, cfg="init+update+final aligned buffer"
alg: self-tests for sha1 using sha1-padlock-nano failed (rc=-22)
------------[ cut here ]------------
alg: shash: sha256-padlock-nano test failed (wrong result) on test vector 0, cfg="init+update+final aligned buffer"
alg: self-tests for sha256 using sha256-padlock-nano failed (rc=-22)
------------[ cut here ]------------
This patch introduces new functions and data structures to properly meet
the requirements of XSHA1 and XSHA256 instruction on Zhaoxin processors.
[1] https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=1103397
[2] https://linux-hardware.org/?probe=271fabb7a4&log=dmesg
Signed-off-by: AlanSong-oc <AlanSong-oc@...oxin.com>
---
drivers/crypto/padlock-sha.c | 169 ++++++++++++++++++++++++++++++++---
1 file changed, 157 insertions(+), 12 deletions(-)
diff --git a/drivers/crypto/padlock-sha.c b/drivers/crypto/padlock-sha.c
index 329f60ad4..f980e08f6 100644
--- a/drivers/crypto/padlock-sha.c
+++ b/drivers/crypto/padlock-sha.c
@@ -99,6 +99,14 @@ static inline void padlock_output_block(uint32_t *src,
*dst++ = swab32(*src++);
}
+static inline void padlock_pad_block_zhaoxin(u8 *padded_data, size_t block_size, u64 bit_len)
+{
+ memset(padded_data, 0, block_size);
+ padded_data[0] = 0x80;
+ for (int i = 0; i < 8 && bit_len; i++)
+ padded_data[block_size - 1 - i] = (bit_len >> (i * 8)) & 0xFF;
+}
+
static int padlock_sha_finup(struct shash_desc *desc, const u8 *in,
unsigned int count, u8 *out)
{
@@ -133,6 +141,37 @@ static int padlock_sha1_finup(struct shash_desc *desc, const u8 *in,
return 0;
}
+static int padlock_sha1_finup_zhaoxin(struct shash_desc *desc, const u8 *in,
+ unsigned int count, u8 *out)
+{
+ struct sha1_state *state = padlock_shash_desc_ctx(desc);
+ u64 start = state->count;
+
+ if (start + count > ULONG_MAX)
+ return padlock_sha_finup(desc, in, count, out);
+
+ if (count == 0) {
+ u8 buf[SHA1_BLOCK_SIZE + PADLOCK_ALIGNMENT - 1];
+ u8 *padded_data = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT);
+ u64 bit_len = (start + count) * 8;
+
+ padlock_pad_block_zhaoxin(padded_data, SHA1_BLOCK_SIZE, bit_len);
+
+ asm volatile(".byte 0xf3,0x0f,0xa6,0xc8"
+ : "+S"(padded_data), "+D"(state)
+ : "a"((long)-1), "c"(1UL));
+ } else {
+ /* Process the input data in bytes, applying necessary padding */
+ asm volatile(".byte 0xf3,0x0f,0xa6,0xc8"
+ :
+ : "c"((unsigned long)start + count), "a"((unsigned long)start),
+ "S"(in), "D"(state));
+ }
+
+ padlock_output_block(state->state, (uint32_t *)out, 5);
+ return 0;
+}
+
static int padlock_sha256_finup(struct shash_desc *desc, const u8 *in,
unsigned int count, u8 *out)
{
@@ -155,6 +194,37 @@ static int padlock_sha256_finup(struct shash_desc *desc, const u8 *in,
return 0;
}
+static int padlock_sha256_finup_zhaoxin(struct shash_desc *desc, const u8 *in,
+ unsigned int count, u8 *out)
+{
+ struct sha256_state *state = padlock_shash_desc_ctx(desc);
+ u64 start = state->count;
+
+ if (start + count > ULONG_MAX)
+ return padlock_sha_finup(desc, in, count, out);
+
+ if (count == 0) {
+ u8 buf[SHA256_BLOCK_SIZE + PADLOCK_ALIGNMENT - 1];
+ u8 *padded_data = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT);
+ u64 bit_len = (start + count) * 8;
+
+ padlock_pad_block_zhaoxin(padded_data, SHA256_BLOCK_SIZE, bit_len);
+
+ asm volatile(".byte 0xf3,0x0f,0xa6,0xd0"
+ : "+S"(padded_data), "+D"(state)
+ : "a"((long)-1), "c"(1UL));
+ } else {
+ /* Process the input data in bytes, applying necessary padding */
+ asm volatile(".byte 0xf3,0x0f,0xa6,0xd0"
+ :
+ : "c"((unsigned long)start + count), "a"((unsigned long)start),
+ "S"(in), "D"(state));
+ }
+
+ padlock_output_block(state->state, (uint32_t *)out, 8);
+ return 0;
+}
+
static int padlock_init_tfm(struct crypto_shash *hash)
{
const char *fallback_driver_name = crypto_shash_alg_name(hash);
@@ -258,6 +328,31 @@ static int padlock_sha1_update_nano(struct shash_desc *desc,
return len;
}
+static int padlock_sha1_update_zhaoxin(struct shash_desc *desc,
+ const u8 *src, unsigned int len)
+{
+ struct sha1_state *state = padlock_shash_desc_ctx(desc);
+ int blocks = len / SHA1_BLOCK_SIZE;
+
+ /* The xsha1 instruction requires a 32-byte buffer for execution for Zhaoxin processors */
+ u8 buf[32 + PADLOCK_ALIGNMENT - 1];
+ u8 *dst = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT);
+
+ memcpy(dst, (u8 *)(state->state), SHA1_DIGEST_SIZE);
+
+ len -= blocks * SHA1_BLOCK_SIZE;
+ state->count += blocks * SHA1_BLOCK_SIZE;
+
+ /* Process the input data in blocks, without applying padding */
+ asm volatile(".byte 0xf3,0x0f,0xa6,0xc8"
+ : "+S"(src), "+D"(dst)
+ : "a"((long)-1), "c"((unsigned long)blocks));
+
+ memcpy((u8 *)(state->state), dst, SHA1_DIGEST_SIZE);
+
+ return len;
+}
+
static int padlock_sha256_update_nano(struct shash_desc *desc, const u8 *src,
unsigned int len)
{
@@ -316,6 +411,44 @@ static struct shash_alg sha256_alg_nano = {
}
};
+static struct shash_alg sha1_alg_zhaoxin = {
+ .digestsize = SHA1_DIGEST_SIZE,
+ .init = padlock_sha1_init,
+ .update = padlock_sha1_update_zhaoxin,
+ .finup = padlock_sha1_finup_zhaoxin,
+ .export = padlock_sha_export,
+ .import = padlock_sha_import,
+ .descsize = PADLOCK_SHA_DESCSIZE,
+ .statesize = SHA1_STATE_SIZE,
+ .base = {
+ .cra_name = "sha1",
+ .cra_driver_name = "sha1-padlock-zhaoxin",
+ .cra_priority = PADLOCK_CRA_PRIORITY,
+ .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY | CRYPTO_AHASH_ALG_FINUP_MAX,
+ .cra_blocksize = SHA1_BLOCK_SIZE,
+ .cra_module = THIS_MODULE,
+ }
+};
+
+static struct shash_alg sha256_alg_zhaoxin = {
+ .digestsize = SHA256_DIGEST_SIZE,
+ .init = padlock_sha256_init,
+ .update = padlock_sha256_update_nano,
+ .finup = padlock_sha256_finup_zhaoxin,
+ .export = padlock_sha_export,
+ .import = padlock_sha_import,
+ .descsize = PADLOCK_SHA_DESCSIZE,
+ .statesize = sizeof(struct crypto_sha256_state),
+ .base = {
+ .cra_name = "sha256",
+ .cra_driver_name = "sha256-padlock-zhaoxin",
+ .cra_priority = PADLOCK_CRA_PRIORITY,
+ .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY | CRYPTO_AHASH_ALG_FINUP_MAX,
+ .cra_blocksize = SHA256_BLOCK_SIZE,
+ .cra_module = THIS_MODULE,
+ }
+};
+
static const struct x86_cpu_id padlock_sha_ids[] = {
X86_MATCH_FEATURE(X86_FEATURE_PHE, NULL),
{}
@@ -332,14 +465,21 @@ static int __init padlock_init(void)
if (!x86_match_cpu(padlock_sha_ids) || !boot_cpu_has(X86_FEATURE_PHE_EN))
return -ENODEV;
- /* Register the newly added algorithm module if on *
- * VIA Nano processor, or else just do as before */
- if (c->x86_model < 0x0f) {
- sha1 = &sha1_alg;
- sha256 = &sha256_alg;
+ if (c->x86 >= 0x07) {
+ /* Register the newly added algorithm module for Zhaoxin processors */
+ sha1 = &sha1_alg_zhaoxin;
+ sha256 = &sha256_alg_zhaoxin;
} else {
- sha1 = &sha1_alg_nano;
- sha256 = &sha256_alg_nano;
+ /* Register the newly added algorithm module if on
+ * VIA Nano processor, or else just do as before
+ */
+ if (c->x86_model < 0x0f) {
+ sha1 = &sha1_alg;
+ sha256 = &sha256_alg;
+ } else {
+ sha1 = &sha1_alg_nano;
+ sha256 = &sha256_alg_nano;
+ }
}
rc = crypto_register_shash(sha1);
@@ -366,12 +506,17 @@ static void __exit padlock_fini(void)
{
struct cpuinfo_x86 *c = &cpu_data(0);
- if (c->x86_model >= 0x0f) {
- crypto_unregister_shash(&sha1_alg_nano);
- crypto_unregister_shash(&sha256_alg_nano);
+ if (c->x86 >= 0x07) {
+ crypto_unregister_shash(&sha1_alg_zhaoxin);
+ crypto_unregister_shash(&sha256_alg_zhaoxin);
} else {
- crypto_unregister_shash(&sha1_alg);
- crypto_unregister_shash(&sha256_alg);
+ if (c->x86_model >= 0x0f) {
+ crypto_unregister_shash(&sha1_alg_nano);
+ crypto_unregister_shash(&sha256_alg_nano);
+ } else {
+ crypto_unregister_shash(&sha1_alg);
+ crypto_unregister_shash(&sha256_alg);
+ }
}
}
--
2.34.1
Powered by blists - more mailing lists