[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <77655d9e-fc05-4300-8f0d-7b2ad840d091@csgroup.eu>
Date: Thu, 19 Sep 2024 09:08:25 +0200
From: Christophe Leroy <christophe.leroy@...roup.eu>
To: Xi Ruoyao <xry111@...111.site>, "Jason A . Donenfeld" <Jason@...c4.com>,
Huacai Chen <chenhuacai@...nel.org>, WANG Xuerui <kernel@...0n.name>
Cc: linux-crypto@...r.kernel.org, loongarch@...ts.linux.dev,
linux-kernel@...r.kernel.org, Jinyang He <hejinyang@...ngson.cn>,
Tiezhu Yang <yangtiezhu@...ngson.cn>, Arnd Bergmann <arnd@...db.de>
Subject: Re: [PATCH v6 2/3] LoongArch: vDSO: Wire up getrandom() vDSO
implementation
Hi Xi,
Le 01/09/2024 à 08:13, Xi Ruoyao a écrit :
> Hook up the generic vDSO implementation to the LoongArch vDSO data page
> by providing the required __arch_chacha20_blocks_nostack,
> __arch_get_k_vdso_rng_data, and getrandom_syscall implementations.
>
> Signed-off-by: Xi Ruoyao <xry111@...111.site>
> ---
...
> diff --git a/arch/loongarch/vdso/vgetrandom-chacha.S b/arch/loongarch/vdso/vgetrandom-chacha.S
> new file mode 100644
> index 000000000000..7e86a50f6e85
> --- /dev/null
> +++ b/arch/loongarch/vdso/vgetrandom-chacha.S
> @@ -0,0 +1,242 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (C) 2024 Xi Ruoyao <xry111@...111.site>. All Rights Reserved.
> + */
> +
> +#include <asm/asm.h>
> +#include <asm/regdef.h>
> +#include <linux/linkage.h>
> +
> +.text
> +
> +/* Salsa20 quarter-round */
> +.macro QR a b c d
> + add.w \a, \a, \b
> + xor \d, \d, \a
> + rotri.w \d, \d, 16
> +
> + add.w \c, \c, \d
> + xor \b, \b, \c
> + rotri.w \b, \b, 20
> +
> + add.w \a, \a, \b
> + xor \d, \d, \a
> + rotri.w \d, \d, 24
> +
> + add.w \c, \c, \d
> + xor \b, \b, \c
> + rotri.w \b, \b, 25
> +.endm
> +
I know nothing about Loongarch assembly and execution performance, but I
see that GCC groups operations by 4 when building
reference_chacha20_blocks() from vdso_test_chacha, see below.
Shouldn't you do the same and group ROUNDs by 4 just like I did on
powerpc ?
(https://github.com/torvalds/linux/blob/master/arch/powerpc/kernel/vdso/vgetrandom-chacha.S)
0000000000000134 <.L3>:
134: 001061d8 add.w $s1, $t2, $s1
138: 0015c312 xor $t6, $s1, $t4
13c: 26000070 ldptr.d $t4, $sp, 0
140: 001036d6 add.w $fp, $fp, $t1
144: 001065f9 add.w $s2, $t3, $s2
148: 0010335a add.w $s3, $s3, $t0
14c: 00159ad3 xor $t7, $fp, $a2
150: 0015c344 xor $a0, $s3, $t4
154: 0015c731 xor $t5, $s2, $t5
158: 004cc273 rotri.w $t7, $t7, 0x10
15c: 004cc252 rotri.w $t6, $t6, 0x10
160: 004cc231 rotri.w $t5, $t5, 0x10
164: 004cc084 rotri.w $a0, $a0, 0x10
168: 00104766 add.w $a2, $s4, $t5
16c: 00102088 add.w $a4, $a0, $a4
170: 00102669 add.w $a5, $t7, $a5
174: 001048e7 add.w $a3, $a3, $t6
178: 0015b530 xor $t4, $a5, $t1
17c: 0015b10c xor $t0, $a4, $t0
180: 0015b8ee xor $t2, $a3, $t2
184: 0015bccf xor $t3, $a2, $t3
188: 004cd18d rotri.w $t1, $t0, 0x14
18c: 004cd210 rotri.w $t4, $t4, 0x14
190: 004cd1ce rotri.w $t2, $t2, 0x14
194: 004cd1ef rotri.w $t3, $t3, 0x14
198: 001042d6 add.w $fp, $fp, $t4
19c: 00103b18 add.w $s1, $s1, $t2
1a0: 00103f39 add.w $s2, $s2, $t3
1a4: 0010375a add.w $s3, $s3, $t1
1a8: 0015ced3 xor $t7, $fp, $t7
1ac: 0015cb12 xor $t6, $s1, $t6
1b0: 0015c731 xor $t5, $s2, $t5
1b4: 00159344 xor $a0, $s3, $a0
1b8: 004ce274 rotri.w $t8, $t7, 0x18
1bc: 004ce084 rotri.w $a0, $a0, 0x18
1c0: 004ce253 rotri.w $t7, $t6, 0x18
1c4: 004ce232 rotri.w $t6, $t5, 0x18
1c8: 00105129 add.w $a5, $a5, $t8
1cc: 00101111 add.w $t5, $a4, $a0
1d0: 00104ce7 add.w $a3, $a3, $t7
1d4: 001048c6 add.w $a2, $a2, $t6
1d8: 0015c130 xor $t4, $a5, $t4
1dc: 0015b8ee xor $t2, $a3, $t2
1e0: 0015bccf xor $t3, $a2, $t3
1e4: 0015b62d xor $t1, $t5, $t1
1e8: 004ce610 rotri.w $t4, $t4, 0x19
1ec: 004ce5ce rotri.w $t2, $t2, 0x19
1f0: 004ce5ef rotri.w $t3, $t3, 0x19
1f4: 004ce5ad rotri.w $t1, $t1, 0x19
1f8: 00103ad6 add.w $fp, $fp, $t2
1fc: 00103f18 add.w $s1, $s1, $t3
200: 00103739 add.w $s2, $s2, $t1
204: 0010435a add.w $s3, $s3, $t4
208: 001592c4 xor $a0, $fp, $a0
20c: 0015d314 xor $t8, $s1, $t8
210: 0015cf33 xor $t7, $s2, $t7
214: 0015cb52 xor $t6, $s3, $t6
218: 004cc084 rotri.w $a0, $a0, 0x10
21c: 004cc294 rotri.w $t8, $t8, 0x10
220: 004cc273 rotri.w $t7, $t7, 0x10
224: 004cc252 rotri.w $t6, $t6, 0x10
228: 001010dc add.w $s5, $a2, $a0
22c: 0010523d add.w $s6, $t5, $t8
230: 00104d3e add.w $s7, $a5, $t7
234: 001048ff add.w $s8, $a3, $t6
238: 0015c3ec xor $t0, $s8, $t4
23c: 0015bb8e xor $t2, $s5, $t2
240: 0015bfaf xor $t3, $s6, $t3
244: 0015b7cd xor $t1, $s7, $t1
248: 004cd1ad rotri.w $t1, $t1, 0x14
24c: 004cd18c rotri.w $t0, $t0, 0x14
250: 004cd1ce rotri.w $t2, $t2, 0x14
254: 004cd1ef rotri.w $t3, $t3, 0x14
258: 00103ad7 add.w $s0, $fp, $t2
25c: 00103f0a add.w $a6, $s1, $t3
260: 0010372b add.w $a7, $s2, $t1
264: 00103341 add.w $ra, $s3, $t0
268: 001592e4 xor $a0, $s0, $a0
26c: 0015d154 xor $t8, $a6, $t8
270: 0015cd73 xor $t7, $a7, $t7
274: 0015c832 xor $t6, $ra, $t6
278: 004ce084 rotri.w $a0, $a0, 0x18
27c: 004ce294 rotri.w $t8, $t8, 0x18
280: 004ce273 rotri.w $t7, $t7, 0x18
284: 004ce252 rotri.w $t6, $t6, 0x18
288: 0010139c add.w $s5, $s5, $a0
28c: 001053bd add.w $s6, $s6, $t8
290: 00104fde add.w $s7, $s7, $t7
294: 00104bff add.w $s8, $s8, $t6
298: 0015b7d1 xor $t5, $s7, $t1
29c: 0015bb8e xor $t2, $s5, $t2
2a0: 0015b3ed xor $t1, $s8, $t0
2a4: 0015bfaf xor $t3, $s6, $t3
2a8: 0040808c slli.w $t0, $a0, 0x0
2ac: 004ce631 rotri.w $t5, $t5, 0x19
2b0: 004ce5ce rotri.w $t2, $t2, 0x19
2b4: 004ce5ef rotri.w $t3, $t3, 0x19
2b8: 004ce5ad rotri.w $t1, $t1, 0x19
2bc: 2700006c stptr.d $t0, $sp, 0
2c0: 02bffca5 addi.w $a1, $a1, -1(0xfff)
2c4: 0040822c slli.w $t0, $t5, 0x0
2c8: 004082f6 slli.w $fp, $s0, 0x0
2cc: 0040839b slli.w $s4, $s5, 0x0
2d0: 004081ce slli.w $t2, $t2, 0x0
2d4: 00408158 slli.w $s1, $a6, 0x0
2d8: 00408286 slli.w $a2, $t8, 0x0
2dc: 004083a8 slli.w $a4, $s6, 0x0
2e0: 004081ef slli.w $t3, $t3, 0x0
2e4: 00408179 slli.w $s2, $a7, 0x0
2e8: 00408270 slli.w $t4, $t7, 0x0
2ec: 004083c9 slli.w $a5, $s7, 0x0
2f0: 0040803a slli.w $s3, $ra, 0x0
2f4: 00408251 slli.w $t5, $t6, 0x0
2f8: 004083e7 slli.w $a3, $s8, 0x0
2fc: 004081ad slli.w $t1, $t1, 0x0
300: 47fe34bf bnez $a1, -460(0x7ffe34) # 134 <.L3>
Christophe
Powered by blists - more mailing lists