[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-Id: <67ab24d26837fcc85a59eca9c68db9bf27a878dd.1611128021.git.christophe.leroy@csgroup.eu>
Date: Wed, 20 Jan 2021 07:34:00 +0000 (UTC)
From: Christophe Leroy <christophe.leroy@...roup.eu>
To: Benjamin Herrenschmidt <benh@...nel.crashing.org>,
Paul Mackerras <paulus@...ba.org>,
Michael Ellerman <mpe@...erman.id.au>
Cc: linux-kernel@...r.kernel.org, linuxppc-dev@...ts.ozlabs.org
Subject: [PATCH 2/2] powerpc/32s: Unroll kuep_lock and kuep_unlock macros
Unroll the loops in kuep_lock and kuep_unlock.
Benchmarked on an mpc 8321 with a standard kernel having a
3M/1M user/kernel memory split, i.e. 12 segments for user.
Without KUEP, null_syscall benchmark is 220 cycles.
With KUEP, null_syscall benchmark is 439 cycles.
Once loops are unrolled, null_syscall benchmark is 366 cycles.
This is almost 17% reduction.
It is assumed that userspace covers at least 4 segments and
at most 14 segments.
The isync is removed, it saves 8 cycles. For kuep_unlock, the rfi
will do the synchronisation. For kuep_lock, we get a small window
during which exec is still possible, but is won't last more than a
few instructions.
Both macros are called two times so the size increase is in
the noise (approx 120 instructions).
Signed-off-by: Christophe Leroy <christophe.leroy@...roup.eu>
---
arch/powerpc/include/asm/book3s/32/kup.h | 67 ++++++++++++++++++------
1 file changed, 52 insertions(+), 15 deletions(-)
diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h
index a0117a9d5b06..e800b515ac02 100644
--- a/arch/powerpc/include/asm/book3s/32/kup.h
+++ b/arch/powerpc/include/asm/book3s/32/kup.h
@@ -7,21 +7,61 @@
#ifdef __ASSEMBLY__
-.macro kuep_update_sr gpr1, gpr2 /* NEVER use r0 as gpr2 due to addis */
-101: mtsrin \gpr1, \gpr2
- addi \gpr1, \gpr1, 0x111 /* next VSID */
- rlwinm \gpr1, \gpr1, 0, 0xf0ffffff /* clear VSID overflow */
- addis \gpr2, \gpr2, 0x1000 /* address of next segment */
- bdnz 101b
- isync
+.macro kuep_increment gpr1, gpr2
+ addi \gpr1, \gpr1, 0x222 /* Next second VSID */
+ addi \gpr2, \gpr2, 0x222 /* Next second VSID */
+ rlwinm \gpr1, \gpr1, 0, 0xf0ffffff /* Clear VSID overflow */
+ rlwinm \gpr2, \gpr2, 0, 0xf0ffffff /* Clear VSID overflow */
+.endm
+
+.macro kuep_update_sr gpr1, gpr2 /* NEVER use r0 as gpr1 or gpr2 due to addi */
+ addi \gpr2, \gpr1, 0x111 /* Next VSID */
+ rlwinm \gpr2, \gpr2, 0, 0xf0ffffff /* Clear VSID overflow */
+ mtsr 0, \gpr1
+ mtsr 1, \gpr2
+ kuep_increment \gpr1, \gpr2
+ mtsr 2, \gpr1
+ mtsr 3, \gpr2
+#if NUM_USER_SEGMENTS > 4
+ kuep_increment \gpr1, \gpr2
+ mtsr 4, \gpr1
+#if NUM_USER_SEGMENTS > 5
+ mtsr 5, \gpr2
+#if NUM_USER_SEGMENTS > 6
+ kuep_increment \gpr1, \gpr2
+ mtsr 6, \gpr1
+#if NUM_USER_SEGMENTS > 7
+ mtsr 7, \gpr2
+#if NUM_USER_SEGMENTS > 8
+ kuep_increment \gpr1, \gpr2
+ mtsr 8, \gpr1
+#if NUM_USER_SEGMENTS > 9
+ mtsr 9, \gpr2
+#if NUM_USER_SEGMENTS > 10
+ kuep_increment \gpr1, \gpr2
+ mtsr 10, \gpr1
+#if NUM_USER_SEGMENTS > 11
+ mtsr 11, \gpr2
+#if NUM_USER_SEGMENTS > 12
+ kuep_increment \gpr1, \gpr2
+ mtsr 12, \gpr1
+#if NUM_USER_SEGMENTS > 13
+ mtsr 13, \gpr2
+#endif
+#endif
+#endif
+#endif
+#endif
+#endif
+#endif
+#endif
+#endif
+#endif
.endm
.macro kuep_lock gpr1, gpr2
#ifdef CONFIG_PPC_KUEP
- li \gpr1, NUM_USER_SEGMENTS
- li \gpr2, 0
- mtctr \gpr1
- mfsrin \gpr1, \gpr2
+ mfsr \gpr1, 0
oris \gpr1, \gpr1, SR_NX@h /* set Nx */
kuep_update_sr \gpr1, \gpr2
#endif
@@ -29,10 +69,7 @@
.macro kuep_unlock gpr1, gpr2
#ifdef CONFIG_PPC_KUEP
- li \gpr1, NUM_USER_SEGMENTS
- li \gpr2, 0
- mtctr \gpr1
- mfsrin \gpr1, \gpr2
+ mfsr \gpr1, 0
rlwinm \gpr1, \gpr1, 0, ~SR_NX /* Clear Nx */
kuep_update_sr \gpr1, \gpr2
#endif
--
2.25.0
Powered by blists - more mailing lists