[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20251017183426.GA315411@google.com>
Date: Fri, 17 Oct 2025 18:34:26 +0000
From: Eric Biggers <ebiggers@...nel.org>
To: linux-crypto@...r.kernel.org
Cc: linux-kernel@...r.kernel.org, x86@...nel.org,
Ard Biesheuvel <ardb@...nel.org>,
"Jason A . Donenfeld" <Jason@...c4.com>
Subject: Re: [PATCH 1/8] crypto: x86/aes-gcm - add VAES+AVX2 optimized code
On Wed, Oct 01, 2025 at 07:31:10PM -0700, Eric Biggers wrote:
> Add an implementation of AES-GCM that uses 256-bit vectors and the
> following CPU features: Vector AES (VAES), Vector Carryless
> Multiplication (VPCLMULQDQ), and AVX2.
A few non-functional cleanups I applied after reading over the assembly
file again (wasn't worth resending the whole patchset):
diff --git a/arch/x86/crypto/aes-gcm-vaes-avx2.S b/arch/x86/crypto/aes-gcm-vaes-avx2.S
index e628dbb33c0e..f58096a37342 100644
--- a/arch/x86/crypto/aes-gcm-vaes-avx2.S
+++ b/arch/x86/crypto/aes-gcm-vaes-avx2.S
@@ -231,11 +231,10 @@ SYM_FUNC_START(aes_gcm_precompute_vaes_avx2)
.set TMP2, %ymm2
.set TMP2_XMM, %xmm2
.set H_CUR, %ymm3
.set H_CUR_XMM, %xmm3
.set H_CUR2, %ymm4
- .set H_CUR2_XMM, %xmm4
.set H_INC, %ymm5
.set H_INC_XMM, %xmm5
.set GFPOLY, %ymm6
.set GFPOLY_XMM, %xmm6
@@ -576,11 +575,10 @@ SYM_FUNC_START(aes_gcm_aad_update_vaes_avx2)
jz .Laad_done
cmp $16, AADLEN
jle .Laad_lastblock
-.Laad_last2blocks:
// Update GHASH with the remaining 17 <= AADLEN <= 31 bytes of AAD.
mov AADLEN, AADLEN // Zero-extend AADLEN to AADLEN64.
vmovdqu (AAD), TMP0_XMM
vmovdqu -16(AAD, AADLEN64), TMP1_XMM
vpshufb BSWAP_MASK_XMM, TMP0_XMM, TMP0_XMM
@@ -632,11 +630,11 @@ SYM_FUNC_END(aes_gcm_aad_update_vaes_avx2)
vpxor RNDKEY0, AESDATA\i, AESDATA\i
.endr
.endm
// Generate and encrypt counter blocks in the given AESDATA vectors, excluding
-// the last AES round. Clobbers TMP0.
+// the last AES round. Clobbers %rax and TMP0.
.macro _aesenc_loop vecs:vararg
_ctr_begin \vecs
lea 16(KEY), %rax
.Laesenc_loop\@:
vbroadcasti128 (%rax), TMP0
@@ -687,11 +685,11 @@ SYM_FUNC_END(aes_gcm_aad_update_vaes_avx2)
.set KEY, %rdi
.set LE_CTR_PTR, %rsi
.set LE_CTR_PTR32, %esi
.set GHASH_ACC_PTR, %rdx
.set SRC, %rcx // Assumed to be %rcx.
- // See .Ltail_xor_and_ghash_partial_vec
+ // See .Ltail_xor_and_ghash_1to16bytes
.set DST, %r8
.set DATALEN, %r9d
.set DATALEN64, %r9 // Zero-extend DATALEN before using!
// Additional local variables
@@ -734,11 +732,10 @@ SYM_FUNC_END(aes_gcm_aad_update_vaes_avx2)
// H_POW[2-1]_XORED contain cached values from KEY->h_powers_xored. The
// descending numbering reflects the order of the key powers.
.set H_POW2_XORED, %ymm7
.set H_POW2_XORED_XMM, %xmm7
.set H_POW1_XORED, %ymm8
- .set H_POW1_XORED_XMM, %xmm8
// RNDKEY0 caches the zero-th round key, and RNDKEYLAST the last one.
.set RNDKEY0, %ymm9
.set RNDKEYLAST, %ymm10
@@ -749,13 +746,11 @@ SYM_FUNC_END(aes_gcm_aad_update_vaes_avx2)
.set AESDATA0, %ymm12
.set AESDATA0_XMM, %xmm12
.set AESDATA1, %ymm13
.set AESDATA1_XMM, %xmm13
.set AESDATA2, %ymm14
- .set AESDATA2_XMM, %xmm14
.set AESDATA3, %ymm15
- .set AESDATA3_XMM, %xmm15
.if \enc
.set GHASHDATA_PTR, DST
.else
.set GHASHDATA_PTR, SRC
Powered by blists - more mailing lists