[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <E1ooSWe-000FEG-G7@rmk-PC.armlinux.org.uk>
Date: Fri, 28 Oct 2022 17:48:08 +0100
From: "Russell King (Oracle)" <rmk+kernel@...linux.org.uk>
To: Yury Norov <yury.norov@...il.com>
Cc: Catalin Marinas <catalin.marinas@....com>,
Mark Rutland <mark.rutland@....com>,
Will Deacon <will@...nel.org>,
Linux Kernel Mailing List <linux-kernel@...r.kernel.org>,
Linus Torvalds <torvalds@...ux-foundation.org>,
linux-arm-kernel@...ts.infradead.org
Subject: [PATCH 4/5] ARM: findbit: operate by words
Convert the implementations to operate on words rather than bytes
which makes bitmap searching faster.
Signed-off-by: Russell King (Oracle) <rmk+kernel@...linux.org.uk>
---
arch/arm/include/asm/assembler.h | 6 +++
arch/arm/lib/findbit.S | 78 ++++++++++++++++++--------------
2 files changed, 50 insertions(+), 34 deletions(-)
diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index 90fbe4a3f9c8..28e18f79c300 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -761,6 +761,12 @@ THUMB( orr \reg , \reg , #PSR_T_BIT )
.endif
.endm
+ .if __LINUX_ARM_ARCH__ < 6
+ .set .Lrev_l_uses_tmp, 1
+ .else
+ .set .Lrev_l_uses_tmp, 0
+ .endif
+
/*
* bl_r - branch and link to register
*
diff --git a/arch/arm/lib/findbit.S b/arch/arm/lib/findbit.S
index 8280f66d38a5..6ec584d16d46 100644
--- a/arch/arm/lib/findbit.S
+++ b/arch/arm/lib/findbit.S
@@ -14,32 +14,32 @@
#include <asm/assembler.h>
.text
+#ifdef __ARMEB__
+#define SWAB_ENDIAN le
+#else
+#define SWAB_ENDIAN be
+#endif
+
.macro find_first, endian, set, name
ENTRY(_find_first_\name\()bit_\endian)
teq r1, #0
beq 3f
mov r2, #0
-1:
- .ifc \endian, be
- eor r3, r2, #0x18
- ARM( ldrb r3, [r0, r3, lsr #3] )
- THUMB( lsr r3, #3 )
- THUMB( ldrb r3, [r0, r3] )
+1: ldr r3, [r0], #4
+ .ifeq \set
+ mvns r3, r3 @ invert/test bits
.else
- ARM( ldrb r3, [r0, r2, lsr #3] )
- THUMB( lsr r3, r2, #3 )
- THUMB( ldrb r3, [r0, r3] )
+ movs r3, r3 @ test bits
.endif
- .ifeq \set
- eors r3, r3, #0xff @ invert bits
+ .ifc \endian, SWAB_ENDIAN
+ bne .L_found_swab
.else
- movs r3, r3
+ bne .L_found @ found the bit?
.endif
- bne .L_found @ any now set - found zero bit
- add r2, r2, #8 @ next bit pointer
+ add r2, r2, #32 @ next index
2: cmp r2, r1 @ any more?
blo 1b
-3: mov r0, r1 @ no free bits
+3: mov r0, r1 @ no more bits
ret lr
ENDPROC(_find_first_\name\()bit_\endian)
.endm
@@ -48,24 +48,25 @@ ENDPROC(_find_first_\name\()bit_\endian)
ENTRY(_find_next_\name\()bit_\endian)
cmp r2, r1
bhs 3b
- ands ip, r2, #7
- beq 1b @ If new byte, goto old routine
- .ifc \endian, be
- eor r3, r2, #0x18
- ARM( ldrb r3, [r0, r3, lsr #3] )
- THUMB( lsr r3, #3 )
- THUMB( ldrb r3, [r0, r3] )
- .else
- ARM( ldrb r3, [r0, r2, lsr #3] )
- THUMB( lsr r3, r2, #3 )
- THUMB( ldrb r3, [r0, r3] )
+ mov ip, r2, lsr #5 @ word index
+ add r0, r0, ip, lsl #2
+ ands ip, r2, #31 @ bit position
+ beq 1b
+ ldr r3, [r0], #4
+ .ifeq \set
+ mvn r3, r3 @ invert bits
+ .endif
+ .ifc \endian, SWAB_ENDIAN
+ rev_l r3, ip
+ .if .Lrev_l_uses_tmp
+ @ we need to recompute ip because rev_l will have overwritten
+ @ it.
+ and ip, r2, #31 @ bit position
.endif
- .ifeq \set
- eor r3, r3, #0xff @ now looking for a 1 bit
.endif
movs r3, r3, lsr ip @ shift off unused bits
bne .L_found
- orr r2, r2, #7 @ if zero, then no bits here
+ orr r2, r2, #31 @ no zero bits
add r2, r2, #1 @ align bit pointer
b 2b @ loop for next bit
ENDPROC(_find_next_\name\()bit_\endian)
@@ -95,6 +96,8 @@ ENDPROC(_find_next_\name\()bit_\endian)
/*
* One or more bits in the LSB of r3 are assumed to be set.
*/
+.L_found_swab:
+ rev_l r3, ip
.L_found:
#if __LINUX_ARM_ARCH__ >= 7
rbit r3, r3 @ reverse bits
@@ -107,13 +110,20 @@ ENDPROC(_find_next_\name\()bit_\endian)
rsb r3, r3, #31 @ offset of first set bit
add r0, r2, r3 @ add offset of first set bit
#else
- tst r3, #0x0f
+ mov ip, #~0
+ tst r3, ip, lsr #16 @ test bits 0-15
+ addeq r2, r2, #16
+ moveq r3, r3, lsr #16
+ tst r3, #0x00ff
+ addeq r2, r2, #8
+ moveq r3, r3, lsr #8
+ tst r3, #0x000f
addeq r2, r2, #4
- movne r3, r3, lsl #4
- tst r3, #0x30
+ moveq r3, r3, lsr #4
+ tst r3, #0x0003
addeq r2, r2, #2
- movne r3, r3, lsl #2
- tst r3, #0x40
+ moveq r3, r3, lsr #2
+ tst r3, #0x0001
addeq r2, r2, #1
mov r0, r2
#endif
--
2.30.2
Powered by blists - more mailing lists