lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <1430119606-25012-1-git-send-email-daniel.wagner@bmw-carit.de>
Date:	Mon, 27 Apr 2015 09:26:46 +0200
From:	Daniel Wagner <daniel.wagner@...-carit.de>
To:	Andrew Morton <akpm@...ux-foundation.org>,
	Rasmus Villemoes <linux@...musvillemoes.dk>
Cc:	Daniel Wagner <daniel.wagner@...-carit.de>,
	linux-kernel@...r.kernel.org
Subject: [PATCH] lib/sort: Add 64 bit swap function

In case the call side is not providing a swap function, we either use
a 32 bit or a generic swap function.  When swapping around pointers on
64 bit architectures falling back to use the generic swap function
seems like an unnecessary waste.

There at least 9 users ('sort' is of difficult to grep for) of sort()
and all of them use the sort function without a customized swap
function. Furthermore, they are all using pointers to swap around:

arch/x86/kernel/e820.c:sanitize_e820_map()
arch/x86/mm/extable.c:sort_extable()
drivers/acpi/fan.c:acpi_fan_get_fps()
fs/btrfs/super.c:btrfs_descending_sort_devices()
fs/xfs/libxfs/xfs_dir2_block.c:xfs_dir2_sf_to_block()
kernel/range.c:clean_sort_range()
mm/memcontrol.c:__mem_cgroup_usage_register_event()
sound/pci/hda/hda_auto_parser.c:snd_hda_parse_pin_defcfg()
sound/pci/hda/hda_auto_parser.c:sort_pins_by_sequence()

Obviously, we could improve the swap for other sizes as well
but this is overkill at this point.

A simple test shows sorting a 400 element array (try to stay in one
page) with either with u32_swap() or u64_swap() show that the theory
actually works. This test was done on a x86_64 (Intel Xeon E5-4610)
machine.

$ cat u32_swap.old | histogram.py
 NumSamples = 99; Min = 26.00; Max = 37.00
 Mean = 28.494949; Variance = 1.542904; SD = 1.242137; Median 28.000000
 each ∎ represents a count of 1
   26.0000 -    27.1000 [    13]: ∎∎∎∎∎∎∎∎∎∎∎∎∎
   27.1000 -    28.2000 [    44]: ∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎
   28.2000 -    29.3000 [    28]: ∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎
   29.3000 -    30.4000 [    13]: ∎∎∎∎∎∎∎∎∎∎∎∎∎
   30.4000 -    31.5000 [     0]:
   31.5000 -    32.6000 [     0]:
   32.6000 -    33.7000 [     0]:
   33.7000 -    34.8000 [     0]:
   34.8000 -    35.9000 [     0]:
   35.9000 -    37.0000 [     1]: ∎

$ cat u32_swap.new | histogram.py
 NumSamples = 99; Min = 26.00; Max = 28.00
 Mean = 27.030303; Variance = 0.191001; SD = 0.437037; Median 27.000000
 each ∎ represents a count of 1
   26.0000 -    26.2000 [     8]: ∎∎∎∎∎∎∎∎
   26.2000 -    26.4000 [     0]:
   26.4000 -    26.6000 [     0]:
   26.6000 -    26.8000 [     0]:
   26.8000 -    27.0000 [    80]: ∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎
   27.0000 -    27.2000 [     0]:
   27.2000 -    27.4000 [     0]:
   27.4000 -    27.6000 [     0]:
   27.6000 -    27.8000 [     0]:
   27.8000 -    28.0000 [    11]: ∎∎∎∎∎∎∎∎∎∎∎

$ cat u64_swap.old | histogram.py
 NumSamples = 99; Min = 43.00; Max = 45.00
 Mean = 44.060606; Variance = 0.137741; SD = 0.371135; Median 44.000000
 each ∎ represents a count of 1
   43.0000 -    43.2000 [     4]: ∎∎∎∎
   43.2000 -    43.4000 [     0]:
   43.4000 -    43.6000 [     0]:
   43.6000 -    43.8000 [     0]:
   43.8000 -    44.0000 [    85]: ∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎
   44.0000 -    44.2000 [     0]:
   44.2000 -    44.4000 [     0]:
   44.4000 -    44.6000 [     0]:
   44.6000 -    44.8000 [     0]:
   44.8000 -    45.0000 [    10]: ∎∎∎∎∎∎∎∎∎∎

$ cat u64_swap.new | histogram.py
 NumSamples = 99; Min = 26.00; Max = 32.00
 Mean = 26.939394; Variance = 0.400367; SD = 0.632746; Median 27.000000
 each ∎ represents a count of 1
   26.0000 -    26.6000 [    13]: ∎∎∎∎∎∎∎∎∎∎∎∎∎
   26.6000 -    27.2000 [    83]: ∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎
   27.2000 -    27.8000 [     0]:
   27.8000 -    28.4000 [     2]: ∎∎
   28.4000 -    29.0000 [     0]:
   29.0000 -    29.6000 [     0]:
   29.6000 -    30.2000 [     0]:
   30.2000 -    30.8000 [     0]:
   30.8000 -    31.4000 [     0]:
   31.4000 -    32.0000 [     1]: ∎

Signed-off-by: Daniel Wagner <daniel.wagner@...-carit.de>
Cc: Rasmus Villemoes <linux@...musvillemoes.dk>
Cc: Andrew Morton <akpm@...ux-foundation.org>
Cc: linux-kernel@...r.kernel.org
---
#include <linux/sort.h>
#include <linux/random.h>
#include <linux/ktime.h>
#include <linux/printk.h>
#include <linux/slab.h>
#include <linux/module.h>

#define ARRAY_ELEMENTS 400

static int cmp_32(const void *a, const void *b)
{
	return (int) (*(u32 *)a - *(u32 *)b);
}

static int cmp_64(const void *a, const void *b)
{
	return (int) (*(u64 *)a - *(u64 *)b);
}

static void init_array32(void *array)
{
	u32 *a = array;
	int i;

	a[0] = 3821;
	for (i = 1; i < ARRAY_ELEMENTS; i++)
		a[i] = next_pseudo_random32(a[i-1]);
}

static void init_array64(void *array)
{
	u64 *a = array;
	int i;

	a[0] = 3821;
	for (i = 1; i < ARRAY_ELEMENTS; i++)
		a[i] = next_pseudo_random32(a[i-1]);
}

static void sort_test(size_t size, void *array)
{
	int (*cmp) (const void *, const void *);
	void (*init)(void *array);
	ktime_t start, stop;
	int i;

	if (size == 4) {
		init = init_array32;
		cmp = cmp_32;
	} else if (size == 8) {
		init = init_array64;
		cmp = cmp_64;
	} else
		return;

	for (i = 0; i < 10000; i++) {
		init(array);

		local_irq_disable();
		start = ktime_get();

		sort(array, ARRAY_ELEMENTS, size, cmp, NULL);

		stop = ktime_get();
		local_irq_enable();

		if (i > 10000 - 100)
		  pr_info("%lld\n",  ktime_to_us(ktime_sub(stop, start)));
	}
}

static void *create_array(size_t size)
{
	void *array;

	array = kmalloc(ARRAY_ELEMENTS * size, GFP_KERNEL);
	if (!array)
		return NULL;

	return array;
}

static int perform_test(size_t size)
{
	void *array;

	array = create_array(size);
	if (!array)
		return -ENOMEM;

	pr_info("test u%d_swap\n", (int)size * 8);
	sort_test(size, array);

	kfree(array);

	return 0;
}

static int __init sort_tests_init(void)
{
	int err;

	err = perform_test(sizeof(u32));
	if (err)
		return err;

	err = perform_test(sizeof(u64));
	if (err)
		return err;

	return 0;
}

static void __exit sort_tests_exit(void)
{
}

module_init(sort_tests_init);
module_exit(sort_tests_exit);

MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("Daniel Wagner");
MODULE_DESCRIPTION("sort perfomance tests");


lib/sort.c | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/lib/sort.c b/lib/sort.c
index 43c9fe7..86e3efe 100644
--- a/lib/sort.c
+++ b/lib/sort.c
@@ -15,6 +15,13 @@ static void u32_swap(void *a, void *b, int size)
 	*(u32 *)b = t;
 }
 
+static void u64_swap(void *a, void *b, int size)
+{
+	u64 t = *(u64 *)a;
+	*(u64 *)a = *(u64 *)b;
+	*(u64 *)b = t;
+}
+
 static void generic_swap(void *a, void *b, int size)
 {
 	char t;
@@ -50,8 +57,18 @@ void sort(void *base, size_t num, size_t size,
 	/* pre-scale counters for performance */
 	int i = (num/2 - 1) * size, n = num * size, c, r;
 
-	if (!swap_func)
-		swap_func = (size == 4 ? u32_swap : generic_swap);
+	if (!swap_func) {
+		switch (size) {
+		case 4:
+			swap_func = u32_swap;
+			break;
+		case 8:
+			swap_func = u64_swap;
+			break;
+		default:
+			swap_func = generic_swap;
+		}
+	}
 
 	/* heapify */
 	for ( ; i >= 0; i -= size) {
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ