lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Mon, 02 Feb 2015 11:43:49 +0100
From:	Rasmus Villemoes <linux@...musvillemoes.dk>
To:	yury.norov@...il.com
Cc:	klimov.linux@...il.com, davem@...emloft.net,
	akpm@...ux-foundation.org, hannes@...essinduktion.org,
	dborkman@...hat.com, laijs@...fujitsu.com,
	takahiro.akashi@...aro.org, valentinrothberg@...il.com,
	linux@...izon.com, msalter@...hat.com, chris@...is-wilson.co.uk,
	tgraf@...g.ch, linux-kernel@...r.kernel.org,
	Yury Norov <y.norov@...sung.com>
Subject: Re: [PATCH v2 1/3] lib: find_*_bit reimplementation

On Sat, Jan 31 2015, yury.norov@...il.com wrote:

> From: Yury Norov <y.norov@...sung.com>
>
> New implementations takes less space in source file (see diffstat)
> and in object. For me it's 710 vs 453 bytes of text.
>

New version generally looks good. Please include a summary of the
changes between the versions either below the --- line or in a 0/n cover
letter, especially since you've now expanded the scope of the series.

Comments below.

>
> Patch was boot-tested on x86_64 and MIPS (big-endian) machines.
> Performance tests were ran on userspace with code like this:
>
> 	/* addr[] is filled from /dev/urandom */
> 	start = clock();
> 	while (ret < nbits)
> 		ret = find_next_bit(addr, nbits, ret + 1);
>
> 	end = clock();
> 	printf("%ld\t", (unsigned long) end - start);
>
> On Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz rezults are next:
> (for find_next_bit, nbits is 8M, for find_first_bit - 80K)
>
> 	find_next_bit:		find_first_bit:
> 	new	current		new	current
> 	26932	43151		14777	14925
> 	26947	43182		14521	15423
> 	26507	43824		15053	14705
> 	27329	43759		14473	14777
> 	26895	43367		14847	15023
> 	26990	43693		15103	15163
> 	26775	43299		15067	15232
> 	27282	42752		14544	15121
> 	27504	43088		14644	14858
> 	26761	43856		14699	15193
> 	26692	43075		14781	14681
> 	27137	42969		14451	15061
> 	...			...
>
> find_next_bit performance gain is 35-40%;
> find_first_bit - no measurable difference.
>
> Signed-off-by: Yury Norov <yury.norov@...il.com>
> ---
>  lib/find_last_bit.c |  31 ++-----
>  lib/find_next_bit.c | 254 +++++++++++++++-------------------------------------
>  2 files changed, 79 insertions(+), 206 deletions(-)
>
> diff --git a/lib/find_last_bit.c b/lib/find_last_bit.c
> index 91ca09f..e67e970 100644
> --- a/lib/find_last_bit.c
> +++ b/lib/find_last_bit.c
> @@ -4,44 +4,29 @@
>   * Written by Rusty Russell <rusty@...tcorp.com.au>
>   * (Inspired by David Howell's find_next_bit implementation)
>   *
> + * Rewritten by Yury Norov <yury.norov@...il.com> to decrease
> + * size and improve performance, 2015.
> + *
>   * This program is free software; you can redistribute it and/or
>   * modify it under the terms of the GNU General Public License
>   * as published by the Free Software Foundation; either version
>   * 2 of the License, or (at your option) any later version.
>   */
>  
> -#include <linux/bitops.h>

Why do you remove that #include? It is rather important that the header
and implementation don't get out of sync. I know that kernel.h includes
bitops.h, but please don't rely on such things. Quoting SubmitChecklist:

1: If you use a facility then #include the file that defines/declares
   that facility.  Don't depend on other header files pulling in ones
   that you use.


>  #include <linux/export.h>
> -#include <asm/types.h>
> -#include <asm/byteorder.h>

However, getting rid of includes that are no longer needed is certainly
a good thing.

> +#include <linux/kernel.h>
>  
>  #ifndef find_last_bit
>  
>  unsigned long find_last_bit(const unsigned long *addr, unsigned long size)
>  {
> -	unsigned long words;
> -	unsigned long tmp;
> -
> -	/* Start at final word. */
> -	words = size / BITS_PER_LONG;
> -
> -	/* Partial final word? */
> -	if (size & (BITS_PER_LONG-1)) {
> -		tmp = (addr[words] & (~0UL >> (BITS_PER_LONG
> -					 - (size & (BITS_PER_LONG-1)))));
> -		if (tmp)
> -			goto found;
> -	}
> +	unsigned long idx = DIV_ROUND_UP(size, BITS_PER_LONG);
>  
> -	while (words) {
> -		tmp = addr[--words];
> -		if (tmp) {
> -found:
> -			return words * BITS_PER_LONG + __fls(tmp);
> -		}
> +	while (idx--) {
> +		if (addr[idx])
> +			return min(idx * BITS_PER_LONG + __fls(addr[idx]), size);
>  	}
>  
> -	/* Not found */
>  	return size;
>  }
>  EXPORT_SYMBOL(find_last_bit);
> diff --git a/lib/find_next_bit.c b/lib/find_next_bit.c
> index 0cbfc0b..ebfb3dc 100644
> --- a/lib/find_next_bit.c
> +++ b/lib/find_next_bit.c
> @@ -3,18 +3,45 @@
>   * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
>   * Written by David Howells (dhowells@...hat.com)
>   *
> + * Rewritten by Yury Norov <yury.norov@...il.com> to decrease
> + * size and improve performance, 2015.
> + *
>   * This program is free software; you can redistribute it and/or
>   * modify it under the terms of the GNU General Public License
>   * as published by the Free Software Foundation; either version
>   * 2 of the License, or (at your option) any later version.
>   */
>  
> -#include <linux/bitops.h>
>  #include <linux/export.h>
> -#include <asm/types.h>
> -#include <asm/byteorder.h>
> +#include <linux/kernel.h>

Same as above.

> +#define HIGH_BITS_MASK(nr)		(ULONG_MAX << (nr))
> +
> +#if !defined(find_next_bit) || !defined(find_next_zero_bit)
> +static unsigned long _find_next_bit(const unsigned long *addr,
> +		unsigned long nbits, unsigned long start, bool set)
> +{
> +	unsigned long tmp = set ? addr[start / BITS_PER_LONG]
> +			: ~addr[start / BITS_PER_LONG];
> +
> +	/* Handle 1st word. */
> +	if (!IS_ALIGNED(start, BITS_PER_LONG)) {
> +		tmp &= HIGH_BITS_MASK(start % BITS_PER_LONG);
> +		start = round_down(start, BITS_PER_LONG);
> +	}
>  
> -#define BITOP_WORD(nr)		((nr) / BITS_PER_LONG)
> +	while (!tmp) {
> +		start += BITS_PER_LONG;
> +		if (start >= nbits)
> +			return nbits;
> +
> +		tmp = set ? addr[start / BITS_PER_LONG]
> +			: ~addr[start / BITS_PER_LONG];
> +	}
> +
> +	return start + __ffs(tmp);
> +}
> +#endif
>  
>  #ifndef find_next_bit
>  /*
> @@ -23,86 +50,22 @@
>  unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
>  			    unsigned long offset)
>  {
> -	const unsigned long *p = addr + BITOP_WORD(offset);
> -	unsigned long result = offset & ~(BITS_PER_LONG-1);
> -	unsigned long tmp;
> -
>  	if (offset >= size)
>  		return size;

Why can't this ...


> -	size -= result;
> -	offset %= BITS_PER_LONG;
> -	if (offset) {
> -		tmp = *(p++);
> -		tmp &= (~0UL << offset);
> -		if (size < BITS_PER_LONG)
> -			goto found_first;
> -		if (tmp)
> -			goto found_middle;
> -		size -= BITS_PER_LONG;
> -		result += BITS_PER_LONG;
> -	}
> -	while (size & ~(BITS_PER_LONG-1)) {
> -		if ((tmp = *(p++)))
> -			goto found_middle;
> -		result += BITS_PER_LONG;
> -		size -= BITS_PER_LONG;
> -	}
> -	if (!size)
> -		return result;
> -	tmp = *p;
>  
> -found_first:
> -	tmp &= (~0UL >> (BITS_PER_LONG - size));
> -	if (tmp == 0UL)		/* Are any bits set? */
> -		return result + size;	/* Nope. */
> -found_middle:
> -	return result + __ffs(tmp);
> +	return min(_find_next_bit(addr, size, offset, 1), size);

... and this be part of _find_next_bit? Can find_next_bit not be simply
'return _find_next_bit(addr, size, offset, 1);', and similarly for
find_next_zero_bit? Btw., passing true and false for the boolean
parameter may be a little clearer.

>  }
>  EXPORT_SYMBOL(find_next_bit);
>  #endif
>  
>  #ifndef find_next_zero_bit
> -/*
> - * This implementation of find_{first,next}_zero_bit was stolen from
> - * Linus' asm-alpha/bitops.h.
> - */
>  unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size,
>  				 unsigned long offset)
>  {
> -	const unsigned long *p = addr + BITOP_WORD(offset);
> -	unsigned long result = offset & ~(BITS_PER_LONG-1);
> -	unsigned long tmp;
> -
>  	if (offset >= size)
>  		return size;

See above.

> -	size -= result;
> -	offset %= BITS_PER_LONG;
> -	if (offset) {
> -		tmp = *(p++);
> -		tmp |= ~0UL >> (BITS_PER_LONG - offset);
> -		if (size < BITS_PER_LONG)
> -			goto found_first;
> -		if (~tmp)
> -			goto found_middle;
> -		size -= BITS_PER_LONG;
> -		result += BITS_PER_LONG;
> -	}
> -	while (size & ~(BITS_PER_LONG-1)) {
> -		if (~(tmp = *(p++)))
> -			goto found_middle;
> -		result += BITS_PER_LONG;
> -		size -= BITS_PER_LONG;
> -	}
> -	if (!size)
> -		return result;
> -	tmp = *p;
>  
> -found_first:
> -	tmp |= ~0UL << size;
> -	if (tmp == ~0UL)	/* Are any bits zero? */
> -		return result + size;	/* Nope. */
> -found_middle:
> -	return result + ffz(tmp);
> +	return min(_find_next_bit(addr, size, offset, 0), size);

See above.

>  }
>  EXPORT_SYMBOL(find_next_zero_bit);
>  #endif
> @@ -113,24 +76,14 @@ EXPORT_SYMBOL(find_next_zero_bit);
>   */
>  unsigned long find_first_bit(const unsigned long *addr, unsigned long size)
>  {
> -	const unsigned long *p = addr;
> -	unsigned long result = 0;
> -	unsigned long tmp;
> +	unsigned long idx;
>  
> -	while (size & ~(BITS_PER_LONG-1)) {
> -		if ((tmp = *(p++)))
> -			goto found;
> -		result += BITS_PER_LONG;
> -		size -= BITS_PER_LONG;
> +	for (idx = 0; idx * BITS_PER_LONG < size; idx++) {
> +		if (addr[idx])
> +			return min(idx * BITS_PER_LONG + __ffs(addr[idx]), size);
>  	}
> -	if (!size)
> -		return result;
>  
> -	tmp = (*p) & (~0UL >> (BITS_PER_LONG - size));
> -	if (tmp == 0UL)		/* Are any bits set? */
> -		return result + size;	/* Nope. */
> -found:
> -	return result + __ffs(tmp);
> +	return size;
>  }
>  EXPORT_SYMBOL(find_first_bit);
>  #endif
> @@ -141,24 +94,14 @@ EXPORT_SYMBOL(find_first_bit);
>   */
>  unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size)
>  {
> -	const unsigned long *p = addr;
> -	unsigned long result = 0;
> -	unsigned long tmp;
> +	unsigned long idx;
>  
> -	while (size & ~(BITS_PER_LONG-1)) {
> -		if (~(tmp = *(p++)))
> -			goto found;
> -		result += BITS_PER_LONG;
> -		size -= BITS_PER_LONG;
> +	for (idx = 0; idx * BITS_PER_LONG < size; idx++) {
> +		if (addr[idx] != ULONG_MAX)
> +			return min(idx * BITS_PER_LONG + ffz(addr[idx]), size);
>  	}
> -	if (!size)
> -		return result;
>  
> -	tmp = (*p) | (~0UL << size);
> -	if (tmp == ~0UL)	/* Are any bits zero? */
> -		return result + size;	/* Nope. */
> -found:
> -	return result + ffz(tmp);
> +	return size;
>  }
>  EXPORT_SYMBOL(find_first_zero_bit);
>  #endif
> @@ -166,18 +109,6 @@ EXPORT_SYMBOL(find_first_zero_bit);
>  #ifdef __BIG_ENDIAN
>  
>  /* include/linux/byteorder does not support "unsigned long" type */
> -static inline unsigned long ext2_swabp(const unsigned long * x)
> -{
> -#if BITS_PER_LONG == 64
> -	return (unsigned long) __swab64p((u64 *) x);
> -#elif BITS_PER_LONG == 32
> -	return (unsigned long) __swab32p((u32 *) x);
> -#else
> -#error BITS_PER_LONG not defined
> -#endif
> -}
> -
> -/* include/linux/byteorder doesn't support "unsigned long" type */
>  static inline unsigned long ext2_swab(const unsigned long y)
>  {
>  #if BITS_PER_LONG == 64
> @@ -189,48 +120,40 @@ static inline unsigned long ext2_swab(const unsigned long y)
>  #endif
>  }
>  
> +#if !defined(find_next_bit_le) || !defined(find_next_zero_bit_le)
> +static unsigned long _find_next_bit_le(const unsigned long *addr,
> +		unsigned long nbits, unsigned long start, bool set)
> +{
> +	unsigned long tmp = set ? addr[start / BITS_PER_LONG]
> +			: ~addr[start / BITS_PER_LONG];
> +
> +	/* Handle 1st word. */
> +	if (!IS_ALIGNED(start, BITS_PER_LONG)) {
> +		tmp &= ext2_swab(HIGH_BITS_MASK(start % BITS_PER_LONG));
> +		start = round_down(start, BITS_PER_LONG);
> +	}
> +
> +	while (!tmp) {
> +		start += BITS_PER_LONG;
> +		if (start >= nbits)
> +			return nbits;
> +
> +		tmp = set ? addr[start / BITS_PER_LONG]
> +			: ~addr[start / BITS_PER_LONG];
> +	}
> +
> +	return start + __ffs(ext2_swab(tmp));
> +}
> +#endif
> +
>  #ifndef find_next_zero_bit_le
>  unsigned long find_next_zero_bit_le(const void *addr, unsigned
>  		long size, unsigned long offset)
>  {
> -	const unsigned long *p = addr;
> -	unsigned long result = offset & ~(BITS_PER_LONG - 1);
> -	unsigned long tmp;
> -
>  	if (offset >= size)
>  		return size;

Again, I think this should be moved to the common implementation in
_find_next_bit_le, and similarly for find_next_bit_le below.

> -	p += BITOP_WORD(offset);
> -	size -= result;
> -	offset &= (BITS_PER_LONG - 1UL);
> -	if (offset) {
> -		tmp = ext2_swabp(p++);
> -		tmp |= (~0UL >> (BITS_PER_LONG - offset));
> -		if (size < BITS_PER_LONG)
> -			goto found_first;
> -		if (~tmp)
> -			goto found_middle;
> -		size -= BITS_PER_LONG;
> -		result += BITS_PER_LONG;
> -	}
>  
> -	while (size & ~(BITS_PER_LONG - 1)) {
> -		if (~(tmp = *(p++)))
> -			goto found_middle_swap;
> -		result += BITS_PER_LONG;
> -		size -= BITS_PER_LONG;
> -	}
> -	if (!size)
> -		return result;
> -	tmp = ext2_swabp(p);
> -found_first:
> -	tmp |= ~0UL << size;
> -	if (tmp == ~0UL)	/* Are any bits zero? */
> -		return result + size; /* Nope. Skip ffz */
> -found_middle:
> -	return result + ffz(tmp);
> -
> -found_middle_swap:
> -	return result + ffz(ext2_swab(tmp));
> +	return min(_find_next_bit_le(addr, size, offset, 0), size);
>  }
>  EXPORT_SYMBOL(find_next_zero_bit_le);
>  #endif
> @@ -239,45 +162,10 @@ EXPORT_SYMBOL(find_next_zero_bit_le);
>  unsigned long find_next_bit_le(const void *addr, unsigned
>  		long size, unsigned long offset)
>  {
> -	const unsigned long *p = addr;
> -	unsigned long result = offset & ~(BITS_PER_LONG - 1);
> -	unsigned long tmp;
> -
>  	if (offset >= size)
>  		return size;
> -	p += BITOP_WORD(offset);
> -	size -= result;
> -	offset &= (BITS_PER_LONG - 1UL);
> -	if (offset) {
> -		tmp = ext2_swabp(p++);
> -		tmp &= (~0UL << offset);
> -		if (size < BITS_PER_LONG)
> -			goto found_first;
> -		if (tmp)
> -			goto found_middle;
> -		size -= BITS_PER_LONG;
> -		result += BITS_PER_LONG;
> -	}
> -
> -	while (size & ~(BITS_PER_LONG - 1)) {
> -		tmp = *(p++);
> -		if (tmp)
> -			goto found_middle_swap;
> -		result += BITS_PER_LONG;
> -		size -= BITS_PER_LONG;
> -	}
> -	if (!size)
> -		return result;
> -	tmp = ext2_swabp(p);
> -found_first:
> -	tmp &= (~0UL >> (BITS_PER_LONG - size));
> -	if (tmp == 0UL)		/* Are any bits set? */
> -		return result + size; /* Nope. */
> -found_middle:
> -	return result + __ffs(tmp);
>  
> -found_middle_swap:
> -	return result + __ffs(ext2_swab(tmp));
> +	return min(_find_next_bit_le(addr, size, offset, 1), size);
>  }
>  EXPORT_SYMBOL(find_next_bit_le);
>  #endif
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ