lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20251017183426.GA315411@google.com>
Date: Fri, 17 Oct 2025 18:34:26 +0000
From: Eric Biggers <ebiggers@...nel.org>
To: linux-crypto@...r.kernel.org
Cc: linux-kernel@...r.kernel.org, x86@...nel.org,
	Ard Biesheuvel <ardb@...nel.org>,
	"Jason A . Donenfeld" <Jason@...c4.com>
Subject: Re: [PATCH 1/8] crypto: x86/aes-gcm - add VAES+AVX2 optimized code

On Wed, Oct 01, 2025 at 07:31:10PM -0700, Eric Biggers wrote:
> Add an implementation of AES-GCM that uses 256-bit vectors and the
> following CPU features: Vector AES (VAES), Vector Carryless
> Multiplication (VPCLMULQDQ), and AVX2.

A few non-functional cleanups I applied after reading over the assembly
file again (wasn't worth resending the whole patchset):

diff --git a/arch/x86/crypto/aes-gcm-vaes-avx2.S b/arch/x86/crypto/aes-gcm-vaes-avx2.S
index e628dbb33c0e..f58096a37342 100644
--- a/arch/x86/crypto/aes-gcm-vaes-avx2.S
+++ b/arch/x86/crypto/aes-gcm-vaes-avx2.S
@@ -231,11 +231,10 @@ SYM_FUNC_START(aes_gcm_precompute_vaes_avx2)
 	.set	TMP2,		%ymm2
 	.set	TMP2_XMM,	%xmm2
 	.set	H_CUR,		%ymm3
 	.set	H_CUR_XMM,	%xmm3
 	.set	H_CUR2,		%ymm4
-	.set	H_CUR2_XMM,	%xmm4
 	.set	H_INC,		%ymm5
 	.set	H_INC_XMM,	%xmm5
 	.set	GFPOLY,		%ymm6
 	.set	GFPOLY_XMM,	%xmm6
 
@@ -576,11 +575,10 @@ SYM_FUNC_START(aes_gcm_aad_update_vaes_avx2)
 
 	jz		.Laad_done
 	cmp		$16, AADLEN
 	jle		.Laad_lastblock
 
-.Laad_last2blocks:
 	// Update GHASH with the remaining 17 <= AADLEN <= 31 bytes of AAD.
 	mov		AADLEN, AADLEN	// Zero-extend AADLEN to AADLEN64.
 	vmovdqu		(AAD), TMP0_XMM
 	vmovdqu		-16(AAD, AADLEN64), TMP1_XMM
 	vpshufb		BSWAP_MASK_XMM, TMP0_XMM, TMP0_XMM
@@ -632,11 +630,11 @@ SYM_FUNC_END(aes_gcm_aad_update_vaes_avx2)
 	vpxor		RNDKEY0, AESDATA\i, AESDATA\i
 .endr
 .endm
 
 // Generate and encrypt counter blocks in the given AESDATA vectors, excluding
-// the last AES round.  Clobbers TMP0.
+// the last AES round.  Clobbers %rax and TMP0.
 .macro	_aesenc_loop	vecs:vararg
 	_ctr_begin	\vecs
 	lea		16(KEY), %rax
 .Laesenc_loop\@:
 	vbroadcasti128	(%rax), TMP0
@@ -687,11 +685,11 @@ SYM_FUNC_END(aes_gcm_aad_update_vaes_avx2)
 	.set	KEY,		%rdi
 	.set	LE_CTR_PTR,	%rsi
 	.set	LE_CTR_PTR32,	%esi
 	.set	GHASH_ACC_PTR,	%rdx
 	.set	SRC,		%rcx	// Assumed to be %rcx.
-					// See .Ltail_xor_and_ghash_partial_vec
+					// See .Ltail_xor_and_ghash_1to16bytes
 	.set	DST,		%r8
 	.set	DATALEN,	%r9d
 	.set	DATALEN64,	%r9	// Zero-extend DATALEN before using!
 
 	// Additional local variables
@@ -734,11 +732,10 @@ SYM_FUNC_END(aes_gcm_aad_update_vaes_avx2)
 	// H_POW[2-1]_XORED contain cached values from KEY->h_powers_xored.  The
 	// descending numbering reflects the order of the key powers.
 	.set	H_POW2_XORED,	%ymm7
 	.set	H_POW2_XORED_XMM, %xmm7
 	.set	H_POW1_XORED,	%ymm8
-	.set	H_POW1_XORED_XMM, %xmm8
 
 	// RNDKEY0 caches the zero-th round key, and RNDKEYLAST the last one.
 	.set	RNDKEY0,	%ymm9
 	.set	RNDKEYLAST,	%ymm10
 
@@ -749,13 +746,11 @@ SYM_FUNC_END(aes_gcm_aad_update_vaes_avx2)
 	.set	AESDATA0,	%ymm12
 	.set	AESDATA0_XMM,	%xmm12
 	.set	AESDATA1,	%ymm13
 	.set	AESDATA1_XMM,	%xmm13
 	.set	AESDATA2,	%ymm14
-	.set	AESDATA2_XMM,	%xmm14
 	.set	AESDATA3,	%ymm15
-	.set	AESDATA3_XMM,	%xmm15
 
 .if \enc
 	.set	GHASHDATA_PTR,	DST
 .else
 	.set	GHASHDATA_PTR,	SRC

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ