linux-kernel - [PATCH 3/5] lib/find_bit: unify _find_first_{,and,zero}

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20220728161208.865420-4-yury.norov@gmail.com>
Date:   Thu, 28 Jul 2022 09:12:06 -0700
From:   Yury Norov <yury.norov@...il.com>
To:     Linus Torvalds <torvalds@...ux-foundation.org>,
        Guenter Roeck <linux@...ck-us.net>,
        Dennis Zhou <dennis@...nel.org>,
        Russell King <linux@...linux.org.uk>,
        Catalin Marinas <catalin.marinas@....com>,
        Andy Shevchenko <andriy.shevchenko@...ux.intel.com>,
        Rasmus Villemoes <linux@...musvillemoes.dk>,
        Alexey Klimov <aklimov@...hat.com>,
        Linux Kernel Mailing List <linux-kernel@...r.kernel.org>
Cc:     Yury Norov <yury.norov@...il.com>
Subject: [PATCH 3/5] lib/find_bit: unify _find_first_{,and,zero}_bit implementations

The functions are almost identical, so create a common helper for them so
that compiler will be able to either inline the helper and optimize-out
parameters known at compile-time, or save some space by keeping it as a
real function.

On kvm/x86_64, bloat-o-meter reports +9 bytes. Find_bit_benchmark 5 times
before and after doesn't show significant (i.e. delta is greater than 3
sigma) difference, except find_next_bit, which is most likely an outlier
(although, lucky for the patch):
                     v5.19-rc8    Optimized   Difference (more - better)
Random dense bitmap	    ns	         ns             %       sigmas
find_next_bit:          721209       594936            18         3.19
find_next_zero_bit:     738138       638182            14         1.40
find_last_bit:          802393       940846           -17        -0.31
find_first_bit:        3560900      3379983             5         0.65
find_first_and_bit:   38601442     37683449             2         1.00
find_next_and_bit:      335574       300373            10         2.82
                                                       
Random sparse bitmap                                   
find_next_bit:           15868        13856            13         0.82
find_next_zero_bit:    1311843      1227418             6         0.72
find_last_bit:           13633        14080            -3        -0.74
find_first_bit:        1273625      1253343             2         0.52
find_first_and_bit:       8548         8157             5         0.32
find_next_and_bit:        8828         8437             4         0.52

Signed-off-by: Yury Norov <yury.norov@...il.com>
---
 lib/find_bit.c | 62 +++++++++++++++++++++++++++-----------------------
 1 file changed, 33 insertions(+), 29 deletions(-)

diff --git a/lib/find_bit.c b/lib/find_bit.c
index 4ef3151b3109..d207d1699834 100644
--- a/lib/find_bit.c
+++ b/lib/find_bit.c
@@ -20,12 +20,38 @@
 #include <linux/swab.h>
 
 /*
- * This is a common helper function for find_next_bit, find_next_zero_bit, and
- * find_next_and_bit. The differences are:
+ * This is a common helper functions for find_{first,next}_bit{,_le}.
+ * Internal parameters are:
  *  - The "invert" argument, which is XORed with each fetched word before
- *    searching it for one bits.
- *  - The optional "addr2", which is anded with "addr1" if present.
+ *    searching it for set bits; to implement find_*_zero_bit().
+ *  - The optional "addr2", which is ANDed with "addr1" if present; to
+ *    implement find_*_and_bit().
+ *  - The "need_swab" that converts words to BE format; to implement
+ *    find_*_le() on big-endian machines.
  */
+static inline
+unsigned long __find_first_bit(const unsigned long *addr1, const unsigned long *addr2,
+				unsigned long size, unsigned long invert, bool need_swab)
+{
+	unsigned long idx, val;
+
+	for (idx = 0; idx * BITS_PER_LONG < size; idx++) {
+		val = addr1[idx];
+		if (addr2)
+			val &= addr2[idx];
+
+		val ^= invert;
+
+		if (val) {
+			if (need_swab)
+				val = swab(val);
+			return min(idx * BITS_PER_LONG + __ffs(val), size);
+		}
+	}
+
+	return size;
+}
+
 static inline unsigned long __find_next_bit(const unsigned long *addr1,
 		const unsigned long *addr2, unsigned long nbits,
 		unsigned long start, unsigned long invert, bool need_swab)
@@ -118,14 +144,7 @@ EXPORT_SYMBOL(_find_next_bit_le);
  */
 unsigned long _find_first_bit(const unsigned long *addr, unsigned long size)
 {
-	unsigned long idx;
-
-	for (idx = 0; idx * BITS_PER_LONG < size; idx++) {
-		if (addr[idx])
-			return min(idx * BITS_PER_LONG + __ffs(addr[idx]), size);
-	}
-
-	return size;
+	return __find_first_bit(addr, NULL, size, 0UL, false);
 }
 EXPORT_SYMBOL(_find_first_bit);
 #endif
@@ -138,15 +157,7 @@ unsigned long _find_first_and_bit(const unsigned long *addr1,
 				  const unsigned long *addr2,
 				  unsigned long size)
 {
-	unsigned long idx, val;
-
-	for (idx = 0; idx * BITS_PER_LONG < size; idx++) {
-		val = addr1[idx] & addr2[idx];
-		if (val)
-			return min(idx * BITS_PER_LONG + __ffs(val), size);
-	}
-
-	return size;
+	return __find_first_bit(addr1, addr2, size, 0UL, false);
 }
 EXPORT_SYMBOL(_find_first_and_bit);
 #endif
@@ -157,14 +168,7 @@ EXPORT_SYMBOL(_find_first_and_bit);
  */
 unsigned long _find_first_zero_bit(const unsigned long *addr, unsigned long size)
 {
-	unsigned long idx;
-
-	for (idx = 0; idx * BITS_PER_LONG < size; idx++) {
-		if (addr[idx] != ~0UL)
-			return min(idx * BITS_PER_LONG + ffz(addr[idx]), size);
-	}
-
-	return size;
+	return __find_first_bit(addr, NULL, size, ~0UL, false);
 }
 EXPORT_SYMBOL(_find_first_zero_bit);
 #endif
-- 
2.34.1