[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20211013152243.2216899-6-ardb@kernel.org>
Date: Wed, 13 Oct 2021 17:22:39 +0200
From: Ard Biesheuvel <ardb@...nel.org>
To: linux-arm-kernel@...ts.infradead.org
Cc: linux-hardening@...r.kernel.org, mark.rutland@....com,
catalin.marinas@....com, will@...nel.org,
Ard Biesheuvel <ardb@...nel.org>
Subject: [RFC PATCH 5/9] arm64: chacha-neon: move frame pop forward
Instead of branching back to the common exit point of the routine to pop
the stack frame and return to the caller, move the frame pop to right
after the point where we last use the callee save registers. This
simplifies the generation of CFI unwind metadata, and reduces the number
of needed branches.
Signed-off-by: Ard Biesheuvel <ardb@...nel.org>
---
arch/arm64/crypto/chacha-neon-core.S | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/arch/arm64/crypto/chacha-neon-core.S b/arch/arm64/crypto/chacha-neon-core.S
index b70ac76f2610..918c0beae019 100644
--- a/arch/arm64/crypto/chacha-neon-core.S
+++ b/arch/arm64/crypto/chacha-neon-core.S
@@ -691,6 +691,8 @@ CPU_BE( rev a15, a15 )
zip2 v15.2d, v29.2d, v31.2d
stp a14, a15, [x1, #-8]
+ frame_pop
+
tbnz x5, #63, .Lt128
ld1 {v28.16b-v31.16b}, [x2]
@@ -726,7 +728,6 @@ CPU_BE( rev a15, a15 )
st1 {v24.16b-v27.16b}, [x1], #64
st1 {v28.16b-v31.16b}, [x1]
-.Lout: frame_pop
ret
// fewer than 192 bytes of in/output
@@ -744,7 +745,7 @@ CPU_BE( rev a15, a15 )
eor v23.16b, v23.16b, v31.16b
st1 {v20.16b-v23.16b}, [x5] // overlapping stores
1: st1 {v16.16b-v19.16b}, [x1]
- b .Lout
+ ret
// fewer than 128 bytes of in/output
.Lt128: ld1 {v28.16b-v31.16b}, [x10]
@@ -772,7 +773,7 @@ CPU_BE( rev a15, a15 )
eor v31.16b, v31.16b, v3.16b
st1 {v28.16b-v31.16b}, [x6] // overlapping stores
2: st1 {v20.16b-v23.16b}, [x1]
- b .Lout
+ ret
// fewer than 320 bytes of in/output
.Lt320: cbz x7, 3f // exactly 256 bytes?
@@ -789,7 +790,7 @@ CPU_BE( rev a15, a15 )
eor v31.16b, v31.16b, v3.16b
st1 {v28.16b-v31.16b}, [x7] // overlapping stores
3: st1 {v24.16b-v27.16b}, [x1]
- b .Lout
+ ret
SYM_FUNC_END(chacha_4block_xor_neon)
.section ".rodata", "a", %progbits
--
2.30.2
Powered by blists - more mailing lists