[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20220728161208.865420-3-yury.norov@gmail.com>
Date: Thu, 28 Jul 2022 09:12:05 -0700
From: Yury Norov <yury.norov@...il.com>
To: Linus Torvalds <torvalds@...ux-foundation.org>,
Guenter Roeck <linux@...ck-us.net>,
Dennis Zhou <dennis@...nel.org>,
Russell King <linux@...linux.org.uk>,
Catalin Marinas <catalin.marinas@....com>,
Andy Shevchenko <andriy.shevchenko@...ux.intel.com>,
Rasmus Villemoes <linux@...musvillemoes.dk>,
Alexey Klimov <aklimov@...hat.com>,
Linux Kernel Mailing List <linux-kernel@...r.kernel.org>
Cc: Yury Norov <yury.norov@...il.com>
Subject: [PATCH 2/5] lib/find_bit: optimize find_next_bit() functions
The function _find_next_bit() takes parameters that modify its behavior to
implement and- zero- and le- flavors. The parameters are passed at compile
time, but current design prevents the compiled from optimizing them out.
This patch adds wrappers around _find_next_bit(), and turns it into
internal inline helper, so that the optimization becomes possible.
I ran find_bit_benchmark 5 times on top of 5.19-rc8 and 5 times on top
of this patch. The results for kvm/x86_64 are:
v5.19-rc8 Optimized Difference (more - better)
Random dense bitmap ns ns % sigmas*
find_next_bit: 721209 692337 4 0.73
find_next_zero_bit: 738138 701094 5 0.52
find_last_bit: 802393 698133 13 0.49
find_first_bit: 3560900 3574644 0 -0.07
find_first_and_bit: 38601442 37945046 2 0.71
find_next_and_bit: 335574 306184 9 2.36
Random sparse bitmap
find_next_bit: 15868 13856 13 0.82
find_next_zero_bit: 1311843 1227418 4 0.72
find_last_bit: 13633 14080 -3 -0.74
find_first_bit: 1273625 1253343 1 0.52
find_first_and_bit: 8548 8157 7 0.32
find_next_and_bit: 8828 8437 6 0.52
* Calculated as:
(mean(before) - mean(after)) / max(std(before), std(after))
All at all, optimized code is generally faster, but the difference
never reaches solid 3 sigmas. find_next_and_bit almost touches the
limit in dence bitmap test, but no...
However, bloat-o-meter shows significant ~2.5K decrease of image size
So, the optimization has total positive impact.
Bloat-o-meter:
add/remove: 4/2 grow/shrink: 18/193 up/down: 627/-3108 (-2481)
Suggested-by: Linus Torvalds <torvalds@...ux-foundation.org>
Signed-off-by: Yury Norov <yury.norov@...il.com>
---
include/linux/find.h | 25 ++++++++++++++--------
lib/find_bit.c | 49 ++++++++++++++++++++++++++++++++++++++++----
2 files changed, 62 insertions(+), 12 deletions(-)
diff --git a/include/linux/find.h b/include/linux/find.h
index 424ef67d4a42..3ace995d7079 100644
--- a/include/linux/find.h
+++ b/include/linux/find.h
@@ -8,9 +8,18 @@
#include <linux/bitops.h>
-extern unsigned long _find_next_bit(const unsigned long *addr1,
- const unsigned long *addr2, unsigned long nbits,
- unsigned long start, unsigned long invert, unsigned long le);
+unsigned long _find_next_bit(const unsigned long *addr1, unsigned long nbits,
+ unsigned long start);
+unsigned long _find_next_and_bit(const unsigned long *addr1, const unsigned long *addr2,
+ unsigned long nbits, unsigned long start);
+unsigned long _find_next_zero_bit(const unsigned long *addr, unsigned long nbits,
+ unsigned long start);
+#ifdef __BIG_ENDIAN
+unsigned long _find_next_zero_bit_le(const void *addr, unsigned
+ long size, unsigned long offset);
+unsigned long _find_next_bit_le(const void *addr, unsigned
+ long size, unsigned long offset);
+#endif
extern unsigned long _find_first_bit(const unsigned long *addr, unsigned long size);
extern unsigned long _find_first_and_bit(const unsigned long *addr1,
const unsigned long *addr2, unsigned long size);
@@ -41,7 +50,7 @@ unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
return val ? __ffs(val) : size;
}
- return _find_next_bit(addr, NULL, size, offset, 0UL, 0);
+ return _find_next_bit(addr, size, offset);
}
#endif
@@ -71,7 +80,7 @@ unsigned long find_next_and_bit(const unsigned long *addr1,
return val ? __ffs(val) : size;
}
- return _find_next_bit(addr1, addr2, size, offset, 0UL, 0);
+ return _find_next_and_bit(addr1, addr2, size, offset);
}
#endif
@@ -99,7 +108,7 @@ unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size,
return val == ~0UL ? size : ffz(val);
}
- return _find_next_bit(addr, NULL, size, offset, ~0UL, 0);
+ return _find_next_zero_bit(addr, size, offset);
}
#endif
@@ -247,7 +256,7 @@ unsigned long find_next_zero_bit_le(const void *addr, unsigned
return val == ~0UL ? size : ffz(val);
}
- return _find_next_bit(addr, NULL, size, offset, ~0UL, 1);
+ return _find_next_zero_bit_le(addr, size, offset);
}
#endif
@@ -266,7 +275,7 @@ unsigned long find_next_bit_le(const void *addr, unsigned
return val ? __ffs(val) : size;
}
- return _find_next_bit(addr, NULL, size, offset, 0UL, 1);
+ return _find_next_bit_le(addr, size, offset);
}
#endif
diff --git a/lib/find_bit.c b/lib/find_bit.c
index 04c142acfc40..4ef3151b3109 100644
--- a/lib/find_bit.c
+++ b/lib/find_bit.c
@@ -19,9 +19,6 @@
#include <linux/minmax.h>
#include <linux/swab.h>
-#if !defined(find_next_bit) || !defined(find_next_zero_bit) || \
- !defined(find_next_bit_le) || !defined(find_next_zero_bit_le) || \
- !defined(find_next_and_bit)
/*
* This is a common helper function for find_next_bit, find_next_zero_bit, and
* find_next_and_bit. The differences are:
@@ -29,7 +26,7 @@
* searching it for one bits.
* - The optional "addr2", which is anded with "addr1" if present.
*/
-unsigned long _find_next_bit(const unsigned long *addr1,
+static inline unsigned long __find_next_bit(const unsigned long *addr1,
const unsigned long *addr2, unsigned long nbits,
unsigned long start, unsigned long invert, bool need_swab)
{
@@ -68,9 +65,53 @@ unsigned long _find_next_bit(const unsigned long *addr1,
return min(start + __ffs(tmp), nbits);
}
+
+#ifndef find_next_bit
+unsigned long _find_next_bit(const unsigned long *addr, unsigned long nbits, unsigned long start)
+{
+ return __find_next_bit(addr, NULL, nbits, start, 0UL, 0);
+}
EXPORT_SYMBOL(_find_next_bit);
#endif
+#ifndef find_next_and_bit
+unsigned long _find_next_and_bit(const unsigned long *addr1, const unsigned long *addr2,
+ unsigned long nbits, unsigned long start)
+{
+ return __find_next_bit(addr1, addr2, nbits, start, 0UL, 0);
+}
+EXPORT_SYMBOL(_find_next_and_bit);
+#endif
+
+#ifndef find_next_zero_bit
+unsigned long _find_next_zero_bit(const unsigned long *addr, unsigned long nbits,
+ unsigned long start)
+{
+ return __find_next_bit(addr, NULL, nbits, start, ~0UL, 0);
+}
+EXPORT_SYMBOL(_find_next_zero_bit);
+#endif
+
+#ifdef __BIG_ENDIAN
+#ifndef find_next_zero_bit_le
+unsigned long _find_next_zero_bit_le(const void *addr, unsigned
+ long size, unsigned long offset)
+{
+ return __find_next_bit(addr, NULL, size, offset, ~0UL, 1);
+}
+EXPORT_SYMBOL(_find_next_zero_bit_le);
+#endif
+
+#ifndef find_next_bit_le
+unsigned long _find_next_bit_le(const void *addr, unsigned
+ long size, unsigned long offset)
+{
+ return __find_next_bit(addr, NULL, size, offset, 0UL, 1);
+}
+EXPORT_SYMBOL(_find_next_bit_le);
+#endif
+#endif /* __BIG_ENDIAN */
+
#ifndef find_first_bit
/*
* Find the first set bit in a memory region.
--
2.34.1
Powered by blists - more mailing lists