lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <20171028123012.14187-1-ynorov@caviumnetworks.com>
Date:   Sat, 28 Oct 2017 15:30:12 +0300
From:   Yury Norov <ynorov@...iumnetworks.com>
To:     linux-kernel@...r.kernel.org
Cc:     Yury Norov <ynorov@...iumnetworks.com>,
        Alexey Dobriyan <adobriyan@...il.com>,
        Andrew Morton <akpm@...ux-foundation.org>,
        Clement Courbet <courbet@...gle.com>,
        Matthew Wilcox <mawilcox@...rosoft.com>,
        Rasmus Villemoes <linux@...musvillemoes.dk>
Subject: [PATCH] lib: hint GCC to inlilne _find_next_bit() helper

Hi all,

It seems that inlining the _find_next_bit() helper makes
find_next_bit() and find_next_zero_bit() 2 times faster at
the scenario of finding all set/cleared bits of randomly
initialised bitmap.

For another typical scenario of traversing sparse bitmap
there is also measurable improvement observed, about 15%.

The increasing of text size of find_bit.o module is 40 bytes
for arm64 - from 252 to 292 bytes - is looking acceptable.

This patch also contains test module.

Measured on ThunderX machine. Tests for other architectures are
very appreciated.

Before:
[   96.856195] Start testing find_bit() with random-filled bitmap
[   96.868322] find_next_bit: 34529 cycles, 16304 iterations
[   96.879525] find_next_zero_bit: 35771 cycles, 16465 iterations
[   96.891409] find_last_bit: 17444 cycles, 16304 iterations
[   96.914445] find_first_bit: 1219671 cycles, 16305 iterations
[   96.925802] Start testing find_bit() with sparse bitmap
[   96.936308] find_next_bit: 301 cycles, 66 iterations
[   96.946981] find_next_zero_bit: 70897 cycles, 32703 iterations
[   96.958694] find_last_bit: 286 cycles, 66 iterations
[   96.968710] find_first_bit: 5260 cycles, 66 iterations

After:
[  169.464229] Start testing find_bit() with random-filled bitmap
[  169.476191] find_next_bit: 17520 cycles, 16336 iterations
[  169.487210] find_next_zero_bit: 17622 cycles, 16433 iterations
[  169.499111] find_last_bit: 19272 cycles, 16335 iterations
[  169.519735] find_first_bit: 978657 cycles, 16337 iterations
[  169.530912] Start testing find_bit() with sparse bitmap
[  169.541414] find_next_bit: 252 cycles, 66 iterations
[  169.551726] find_next_zero_bit: 34554 cycles, 32703 iterations
[  169.563436] find_last_bit: 294 cycles, 66 iterations
[  169.573439] find_first_bit: 3964 cycles, 66 iterations

CC: Alexey Dobriyan <adobriyan@...il.com>
CC: Andrew Morton <akpm@...ux-foundation.org>
CC: Clement Courbet <courbet@...gle.com>
CC: Matthew Wilcox <mawilcox@...rosoft.com>
CC: Rasmus Villemoes <linux@...musvillemoes.dk>
Signed-off-by: Yury Norov <ynorov@...iumnetworks.com>
---
 lib/Kconfig.debug    |   9 ++++
 lib/Makefile         |   1 +
 lib/find_bit.c       |   2 +-
 lib/test_find_bit.c  | 141 +++++++++++++++++++++++++++++++++++++++++++++++++++
 tools/lib/find_bit.c |   2 +-
 5 files changed, 153 insertions(+), 2 deletions(-)
 create mode 100644 lib/test_find_bit.c

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index dfdad67d8f6c..138034cc68a3 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1838,6 +1838,15 @@ config TEST_BPF
 
 	  If unsure, say N.
 
+config TEST_FIND_BIT
+	tristate "Test find_bit functions"
+	default n
+	help
+	  This builds the "test_find_bit" module that measure find_*_bit()
+	  functions performance.
+
+	  If unsure, say N.
+
 config TEST_FIRMWARE
 	tristate "Test firmware loading via userspace interface"
 	default n
diff --git a/lib/Makefile b/lib/Makefile
index dafa79613fb4..edb792b42c86 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -45,6 +45,7 @@ obj-y += hexdump.o
 obj-$(CONFIG_TEST_HEXDUMP) += test_hexdump.o
 obj-y += kstrtox.o
 obj-$(CONFIG_TEST_BPF) += test_bpf.o
+obj-$(CONFIG_TEST_FIND_BIT) += test_find_bit.o
 obj-$(CONFIG_TEST_FIRMWARE) += test_firmware.o
 obj-$(CONFIG_TEST_SYSCTL) += test_sysctl.o
 obj-$(CONFIG_TEST_HASH) += test_hash.o test_siphash.o
diff --git a/lib/find_bit.c b/lib/find_bit.c
index 6ed74f78380c..9b0c89f3fd3a 100644
--- a/lib/find_bit.c
+++ b/lib/find_bit.c
@@ -28,7 +28,7 @@
  * find_next_zero_bit.  The difference is the "invert" argument, which
  * is XORed with each fetched word before searching it for one bits.
  */
-static unsigned long _find_next_bit(const unsigned long *addr,
+static inline unsigned long _find_next_bit(const unsigned long *addr,
 		unsigned long nbits, unsigned long start, unsigned long invert)
 {
 	unsigned long tmp;
diff --git a/lib/test_find_bit.c b/lib/test_find_bit.c
new file mode 100644
index 000000000000..8eaf10cae214
--- /dev/null
+++ b/lib/test_find_bit.c
@@ -0,0 +1,141 @@
+/*
+ * Test for find_*_bit functions.
+ *
+ * Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+
+/*
+ * find_bit functions are widely used in kernel, so the successful boot
+ * is good enough test for correctness.
+ *
+ * This test is focused on performance of traversing bitmaps. Two typical
+ * scenarios are reproduced:
+ * - randomly filled bitmap with approximately equal number of set and
+ *   cleared bits;
+ * - sparse bitmap with few set bits at random positions.
+ */
+
+#include <linux/bitops.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/printk.h>
+#include <linux/random.h>
+
+#define BITMAP_LEN	(4096UL * 8)
+#define SPARSE		(500)
+
+static DECLARE_BITMAP(bitmap, BITMAP_LEN) __initdata;
+
+/*
+ * This is Schlemiel the Painter's algorithm.
+ * Should be called after all other tests for the same data
+ * because it sets all bits of bitmap to 1.
+ */
+static int __init test_find_first_bit(void *bitmap, unsigned long len)
+{
+	unsigned long i, cnt;
+	cycles_t cycles;
+
+	cycles = get_cycles();
+	for (cnt = i = 0; i < len; cnt++) {
+		i = find_first_bit(bitmap, len);
+		__clear_bit(i, bitmap);
+	}
+	cycles = get_cycles() - cycles;
+	pr_err("find_first_bit: %ld cycles, %ld iterations\n", cycles, cnt);
+
+	return 0;
+}
+
+static int __init test_find_next_bit(const void *bitmap, unsigned long len)
+{
+	unsigned long i, cnt;
+	cycles_t cycles;
+
+	cycles = get_cycles();
+	for (cnt = i = 0; i < BITMAP_LEN; cnt++)
+		i = find_next_bit(bitmap, BITMAP_LEN, i) + 1;
+	cycles = get_cycles() - cycles;
+	pr_err("find_next_bit: %ld cycles, %ld iterations\n", cycles, cnt);
+
+	return 0;
+}
+
+static int __init test_find_next_zero_bit(const void *bitmap, unsigned long len)
+{
+	unsigned long i, cnt;
+	cycles_t cycles;
+
+	cycles = get_cycles();
+	for (cnt = i = 0; i < BITMAP_LEN; cnt++)
+		i = find_next_zero_bit(bitmap, len, i) + 1;
+	cycles = get_cycles() - cycles;
+	pr_err("find_next_zero_bit: %ld cycles, %ld iterations\n", cycles, cnt);
+
+	return 0;
+}
+
+static int __init test_find_last_bit(const void *bitmap, unsigned long len)
+{
+	unsigned long l, cnt = 0;
+	cycles_t cycles;
+
+	cycles = get_cycles();
+	do {
+		cnt++;
+		l = find_last_bit(bitmap, len);
+		if (l >= len)
+			break;
+		len = l;
+	} while (len);
+	cycles = get_cycles() - cycles;
+	pr_err("find_last_bit: %ld cycles, %ld iterations\n", cycles, cnt);
+
+	return 0;
+}
+
+static int __init find_bit_test(void)
+{
+	unsigned long nbits = BITMAP_LEN / SPARSE;
+
+	pr_err("Start testing find_bit() with random-filled bitmap\n");
+
+	get_random_bytes(bitmap, sizeof(bitmap));
+
+	test_find_next_bit(bitmap, BITMAP_LEN);
+	test_find_next_zero_bit(bitmap, BITMAP_LEN);
+	test_find_last_bit(bitmap, BITMAP_LEN);
+	test_find_first_bit(bitmap, BITMAP_LEN);
+
+	pr_err("Start testing find_bit() with sparse bitmap\n");
+
+	bitmap_zero(bitmap, BITMAP_LEN);
+
+	while (nbits--)
+		__set_bit(prandom_u32() % BITMAP_LEN, bitmap);
+
+	test_find_next_bit(bitmap, BITMAP_LEN);
+	test_find_next_zero_bit(bitmap, BITMAP_LEN);
+	test_find_last_bit(bitmap, BITMAP_LEN);
+	test_find_first_bit(bitmap, BITMAP_LEN);
+
+	return 0;
+}
+module_init(find_bit_test);
+
+static void __exit test_find_bit_cleanup(void)
+{
+}
+module_exit(test_find_bit_cleanup);
+
+MODULE_LICENSE("GPL");
diff --git a/tools/lib/find_bit.c b/tools/lib/find_bit.c
index 42c15f906aac..6873b5fa221b 100644
--- a/tools/lib/find_bit.c
+++ b/tools/lib/find_bit.c
@@ -29,7 +29,7 @@
  * find_next_zero_bit.  The difference is the "invert" argument, which
  * is XORed with each fetched word before searching it for one bits.
  */
-static unsigned long _find_next_bit(const unsigned long *addr,
+static inline unsigned long _find_next_bit(const unsigned long *addr,
 		unsigned long nbits, unsigned long start, unsigned long invert)
 {
 	unsigned long tmp;
-- 
2.11.0

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ