lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <54C57D28.60501@gmail.com>
Date:	Mon, 26 Jan 2015 02:32:56 +0300
From:	Yury <yury.norov@...il.com>
To:	Rasmus Villemoes <linux@...musvillemoes.dk>
CC:	linux-kernel@...r.kernel.org, heukelum@...lshack.com,
	mita@...aclelinux.com, Andrew Morton <akpm@...ux-foundation.org>,
	rusty@...tcorp.com.au
Subject: Re: [PATCH] lib: find_*_bit reimplementation

On 24.01.2015 03:45, Rasmus Villemoes wrote:
> On Mon, Jan 19 2015, Yury Norov <yury.norov@...il.com> wrote:
>
>> New implementation takes less space, and, I hope, easier to understand.
>>
>> Signed-off-by: Yury Norov <yury.norov@...il.com>
>> ---
>>  lib/find_next_bit.c | 265 +++++++++++++++-------------------------------------
>>  1 file changed, 73 insertions(+), 192 deletions(-)
>>
> That diffstat is certainly nice. Do you also have numbers for the
> size of the generated code, and do you know if there is a
> measurable performance difference? Have you tested whether the new and
> old code give the same results, also in corner cases?
Hello, Rasmus. Thank you for your time.

> Do you also have numbers for the size of the generated code

Before text section is 817 bytes, after - 533.
Comparing to this version, I have the patch modified now, and numbers may little differ.

> Have you tested whether the new and old code give the same results, also in corner cases?

I tested new version together with old one, and if new did return different value,
it was printed to system log. I didn't measure performance because I don't expect
significant gain here. But now I think it's good idea to write tests for performance and
corner cases. Maybe someone already did it... Brief googling didn't help. Anyway, with
new version of patch I will show my measures.
>
> Some remarks inline below.
>
>> diff --git a/lib/find_next_bit.c b/lib/find_next_bit.c
>> index 0cbfc0b..a5c915f 100644
>> --- a/lib/find_next_bit.c
>> +++ b/lib/find_next_bit.c
>> @@ -11,10 +11,39 @@
>>  
>>  #include <linux/bitops.h>
>>  #include <linux/export.h>
>> +#include <linux/kernel.h>
>>  #include <asm/types.h>
>>  #include <asm/byteorder.h>
>>  
>> -#define BITOP_WORD(nr)		((nr) / BITS_PER_LONG)
>> +#define HIGH_BITS_MASK(nr)		(ULONG_MAX << (nr))
>> +#define MIN(a, b)			((a) < (b) ? (a) : (b))
>> +
> Please don't duplicate min/max macros. kernel.h already provides everything you need.
Ok.
>
>> +#if !defined(find_next_bit) || !defined(find_next_zero_bit)
>> +static unsigned long _find_next_bit(const unsigned long *addr,
>> +		unsigned long end, unsigned long start, unsigned long flags)
> Having two parameters called end and start appearing in that
> order is slightly confusing. Why not keep the name 'size' for
> end, or maybe 'nbits' to make the unit clear. Also, I think flags
> should just be a bool and maybe renamed to something more meaningful.
You're right, Something like this:
 static unsigned long _find_next_bit(const unsigned long *addr,
                        unsigned long nbits, unsigned long start_bit, bool set)
looks better.
>
>> +{
>> +	unsigned long tmp = flags ? addr[start / BITS_PER_LONG]
>> +			: ~addr[start / BITS_PER_LONG];
>> +
>> +	/* Handle 1st word. */
>> +	if (!IS_ALIGNED(start, BITS_PER_LONG)) {
>> +		tmp &= HIGH_BITS_MASK(start % BITS_PER_LONG);
>> +		start = round_down(start, BITS_PER_LONG);
>> +	}
>> +
>> +	do {
>> +		if (tmp)
>> +			return MIN(start + __ffs(tmp), end);
>> +
>> +		start += BITS_PER_LONG;
>> +		if (start >= end)
>> +			return end;
>> +
>> +		tmp = flags ? addr[start / BITS_PER_LONG]
>> +			: ~addr[start / BITS_PER_LONG];
>> +	} while (1);
>> +}
>> +#endif
>>  
>>  #ifndef find_next_bit
>>  /*
>> @@ -23,86 +52,16 @@
>>  unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
>>  			    unsigned long offset)
>>  {
>> -	const unsigned long *p = addr + BITOP_WORD(offset);
>> -	unsigned long result = offset & ~(BITS_PER_LONG-1);
>> -	unsigned long tmp;
>> -
>> -	if (offset >= size)
>> -		return size;
> The previous versions handled this, but your code will always access
> the word at addr[start/BITS_PER_LONG]. Are you sure no caller ever
> passes start >= size?
You're right. Fixed.
>
>> -	size -= result;
>> -	offset %= BITS_PER_LONG;
>> -	if (offset) {
>> -		tmp = *(p++);
>> -		tmp &= (~0UL << offset);
>> -		if (size < BITS_PER_LONG)
>> -			goto found_first;
>> -		if (tmp)
>> -			goto found_middle;
>> -		size -= BITS_PER_LONG;
>> -		result += BITS_PER_LONG;
>> -	}
>> -	while (size & ~(BITS_PER_LONG-1)) {
>> -		if ((tmp = *(p++)))
>> -			goto found_middle;
>> -		result += BITS_PER_LONG;
>> -		size -= BITS_PER_LONG;
>> -	}
>> -	if (!size)
>> -		return result;
>> -	tmp = *p;
>> -
>> -found_first:
>> -	tmp &= (~0UL >> (BITS_PER_LONG - size));
>> -	if (tmp == 0UL)		/* Are any bits set? */
>> -		return result + size;	/* Nope. */
>> -found_middle:
>> -	return result + __ffs(tmp);
>> +	return _find_next_bit(addr, size, offset, 1);
>>  }
>>  EXPORT_SYMBOL(find_next_bit);
>>  #endif
>>  
>>  #ifndef find_next_zero_bit
>> -/*
>> - * This implementation of find_{first,next}_zero_bit was stolen from
>> - * Linus' asm-alpha/bitops.h.
>> - */
>>  unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size,
>>  				 unsigned long offset)
>>  {
>> -	const unsigned long *p = addr + BITOP_WORD(offset);
>> -	unsigned long result = offset & ~(BITS_PER_LONG-1);
>> -	unsigned long tmp;
>> -
>> -	if (offset >= size)
>> -		return size;
>> -	size -= result;
>> -	offset %= BITS_PER_LONG;
>> -	if (offset) {
>> -		tmp = *(p++);
>> -		tmp |= ~0UL >> (BITS_PER_LONG - offset);
>> -		if (size < BITS_PER_LONG)
>> -			goto found_first;
>> -		if (~tmp)
>> -			goto found_middle;
>> -		size -= BITS_PER_LONG;
>> -		result += BITS_PER_LONG;
>> -	}
>> -	while (size & ~(BITS_PER_LONG-1)) {
>> -		if (~(tmp = *(p++)))
>> -			goto found_middle;
>> -		result += BITS_PER_LONG;
>> -		size -= BITS_PER_LONG;
>> -	}
>> -	if (!size)
>> -		return result;
>> -	tmp = *p;
>> -
>> -found_first:
>> -	tmp |= ~0UL << size;
>> -	if (tmp == ~0UL)	/* Are any bits zero? */
>> -		return result + size;	/* Nope. */
>> -found_middle:
>> -	return result + ffz(tmp);
>> +	return _find_next_bit(addr, size, offset, 0);
>>  }
>>  EXPORT_SYMBOL(find_next_zero_bit);
>>  #endif
>> @@ -113,24 +72,14 @@ EXPORT_SYMBOL(find_next_zero_bit);
>>   */
>>  unsigned long find_first_bit(const unsigned long *addr, unsigned long size)
>>  {
>> -	const unsigned long *p = addr;
>> -	unsigned long result = 0;
>> -	unsigned long tmp;
>> +	unsigned int idx;
>>  
>> -	while (size & ~(BITS_PER_LONG-1)) {
>> -		if ((tmp = *(p++)))
>> -			goto found;
>> -		result += BITS_PER_LONG;
>> -		size -= BITS_PER_LONG;
>> +	for (idx = 0; idx * BITS_PER_LONG < size; idx++) {
>> +		if (addr[idx])
>> +			return idx * BITS_PER_LONG + __ffs(addr[idx]);
>>  	}
> I'm afraid this is wrong. It doesn't take garbage bits in the last
> (partial) word into account. You either need to loop over the full
> words and handle the last separately, masking off the garbage bits, or
> wrap min(, size) around the above expression, just as you've done in
> _find_next_bit. I think I prefer the latter.
Yes. Done.
>
> Does anyone use insane size bitmaps with 2^32 bits or more? If so,
> you'd have to watch out for overflow and use unsigned long for idx.
Yes.
>
>> -	if (!size)
>> -		return result;
>>  
>> -	tmp = (*p) & (~0UL >> (BITS_PER_LONG - size));
>> -	if (tmp == 0UL)		/* Are any bits set? */
>> -		return result + size;	/* Nope. */
>> -found:
>> -	return result + __ffs(tmp);
>> +	return size;
>>  }
>>  EXPORT_SYMBOL(find_first_bit);
>>  #endif
>> @@ -141,24 +90,14 @@ EXPORT_SYMBOL(find_first_bit);
>>   */
>>  unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size)
>>  {
>> -	const unsigned long *p = addr;
>> -	unsigned long result = 0;
>> -	unsigned long tmp;
>> +	unsigned int idx;
>>  
>> -	while (size & ~(BITS_PER_LONG-1)) {
>> -		if (~(tmp = *(p++)))
>> -			goto found;
>> -		result += BITS_PER_LONG;
>> -		size -= BITS_PER_LONG;
>> +	for (idx = 0; idx * BITS_PER_LONG < size; idx++) {
>> +		if (addr[idx] != ULONG_MAX)
>> +			return idx * BITS_PER_LONG + ffz(addr[idx]);
>>  	}
> The same of course applies here.
>
>> -	if (!size)
>> -		return result;
>>  
>> -	tmp = (*p) | (~0UL << size);
>> -	if (tmp == ~0UL)	/* Are any bits zero? */
>> -		return result + size;	/* Nope. */
>> -found:
>> -	return result + ffz(tmp);
>> +	return size;
>>  }
>>  EXPORT_SYMBOL(find_first_zero_bit);
>>  #endif
>> @@ -166,18 +105,6 @@ EXPORT_SYMBOL(find_first_zero_bit);
>>  #ifdef __BIG_ENDIAN
>>  
>>  /* include/linux/byteorder does not support "unsigned long" type */
>> -static inline unsigned long ext2_swabp(const unsigned long * x)
>> -{
>> -#if BITS_PER_LONG == 64
>> -	return (unsigned long) __swab64p((u64 *) x);
>> -#elif BITS_PER_LONG == 32
>> -	return (unsigned long) __swab32p((u32 *) x);
>> -#else
>> -#error BITS_PER_LONG not defined
>> -#endif
>> -}
>> -
>> -/* include/linux/byteorder doesn't support "unsigned long" type */
>>  static inline unsigned long ext2_swab(const unsigned long y)
>>  {
>>  #if BITS_PER_LONG == 64
>> @@ -189,48 +116,40 @@ static inline unsigned long ext2_swab(const unsigned long y)
>>  #endif
>>  }
>>  
>> -#ifndef find_next_zero_bit_le
>> -unsigned long find_next_zero_bit_le(const void *addr, unsigned
>> -		long size, unsigned long offset)
>> +#if !defined(find_next_bit_le) || !defined(find_next_zero_bit_le)
>> +static unsigned long _find_next_bit_le(const unsigned long *addr,
>> +		unsigned long end, unsigned long start, unsigned long flags)
>>  {
>> -	const unsigned long *p = addr;
>> -	unsigned long result = offset & ~(BITS_PER_LONG - 1);
>> -	unsigned long tmp;
>> -
>> -	if (offset >= size)
>> -		return size;
>> -	p += BITOP_WORD(offset);
>> -	size -= result;
>> -	offset &= (BITS_PER_LONG - 1UL);
>> -	if (offset) {
>> -		tmp = ext2_swabp(p++);
>> -		tmp |= (~0UL >> (BITS_PER_LONG - offset));
>> -		if (size < BITS_PER_LONG)
>> -			goto found_first;
>> -		if (~tmp)
>> -			goto found_middle;
>> -		size -= BITS_PER_LONG;
>> -		result += BITS_PER_LONG;
>> +	unsigned long tmp = flags ? addr[start / BITS_PER_LONG]
>> +			: ~addr[start / BITS_PER_LONG];
>> +
>> +	/* Handle 1st word. */
>> +	if (!IS_ALIGNED(start, BITS_PER_LONG)) {
>> +		tmp &= ext2_swab(HIGH_BITS_MASK(start
>> +					% BITS_PER_LONG));
> Nit: There's no reason to break that line.
>
>> +		start = round_down(start, BITS_PER_LONG);
>>  	}
>>  
>> -	while (size & ~(BITS_PER_LONG - 1)) {
>> -		if (~(tmp = *(p++)))
>> -			goto found_middle_swap;
>> -		result += BITS_PER_LONG;
>> -		size -= BITS_PER_LONG;
>> -	}
>> -	if (!size)
>> -		return result;
>> -	tmp = ext2_swabp(p);
>> -found_first:
>> -	tmp |= ~0UL << size;
>> -	if (tmp == ~0UL)	/* Are any bits zero? */
>> -		return result + size; /* Nope. Skip ffz */
>> -found_middle:
>> -	return result + ffz(tmp);
>> +	do {
>> +		if (tmp)
>> +			return MIN(start +
>> +				__ffs(ext2_swab(tmp)), end);
> Again, no reason to break the line.
The reason is limitation of 80 bytes per line, but anyway, I did it bad way
>   
>> -found_middle_swap:
>> -	return result + ffz(ext2_swab(tmp));
>> +		start += BITS_PER_LONG;
>> +		if (start >= end)
>> +			return end;
>> +
>> +		tmp = flags ? addr[start / BITS_PER_LONG]
>> +			: ~addr[start / BITS_PER_LONG];
>> +	} while (1);
>> +}
>> +#endif
>> +
>> +#ifndef find_next_zero_bit_le
>> +unsigned long find_next_zero_bit_le(const void *addr, unsigned
>> +		long size, unsigned long offset)
>> +{
>> +	return _find_next_bit_le(addr, size, offset, o);
> I'm assuming you only compile-tested this on little-endian. The o
> wants to be a 0.
Yes, shame on me. I think I'd bring up SPARK or big-endian ARM environment
on my QEMU before uploading new version.
>
> Rasmus
>

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ