[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20150317215638.GA16776@gondor.apana.org.au>
Date: Wed, 18 Mar 2015 08:56:38 +1100
From: Herbert Xu <herbert@...dor.apana.org.au>
To: "'tgraf@...g.ch'" <tgraf@...g.ch>
Cc: David Laight <David.Laight@...LAB.COM>,
David Miller <davem@...emloft.net>,
"netdev@...r.kernel.org" <netdev@...r.kernel.org>,
Eric Dumazet <eric.dumazet@...il.com>
Subject: Re: [v1 PATCH 1/14] rhashtable: Remove shift from bucket_table
On Tue, Mar 17, 2015 at 12:40:12PM +0000, 'tgraf@...g.ch' wrote:
>
> > Agreed. Thomas, an easy test to do is to pump the integers from
> > 0 to 65535 into jhash, then mask it with 65535 and run sort |
> > uniq -c | sort -n on it, I think you'll see what we're talking
> > about.
>
> I'm not claiming perfect hash functions and this is exactly why I
> think average utilization is not an optimal growth criteria because
> it gives very limited view into the actual chain lengths.
>
> What you describe above is a 100% utilization scenario. Initially
Actually 75% is just as bad. To test that just repeat my script
above and add head -n $((65536 / 4 * 3)) before the first sort.
My point is that to get below anything like a chain length limit
of 2 (or even 4), your hash table is going to be mostly empty.
OK 0.1% might be an exaggeration but it's certainly nowhere near 50%
as your hash table size grows.
Please actually try out my test, here is a C program that will help
you do it:
#include <stdio.h>
typedef unsigned char u8;
typedef unsigned int u32;
static inline u32 rol32(u32 word, unsigned int shift)
{
return (word << shift) | (word >> (32 - shift));
}
/* Best hash sizes are of power of two */
#define jhash_size(n) ((u32)1<<(n))
/* Mask the hash value, i.e (value & jhash_mask(n)) instead of (value % n) */
#define jhash_mask(n) (jhash_size(n)-1)
/* __jhash_mix -- mix 3 32-bit values reversibly. */
#define __jhash_mix(a, b, c) \
{ \
a -= c; a ^= rol32(c, 4); c += b; \
b -= a; b ^= rol32(a, 6); a += c; \
c -= b; c ^= rol32(b, 8); b += a; \
a -= c; a ^= rol32(c, 16); c += b; \
b -= a; b ^= rol32(a, 19); a += c; \
c -= b; c ^= rol32(b, 4); b += a; \
}
/* __jhash_final - final mixing of 3 32-bit values (a,b,c) into c */
#define __jhash_final(a, b, c) \
{ \
c ^= b; c -= rol32(b, 14); \
a ^= c; a -= rol32(c, 11); \
b ^= a; b -= rol32(a, 25); \
c ^= b; c -= rol32(b, 16); \
a ^= c; a -= rol32(c, 4); \
b ^= a; b -= rol32(a, 14); \
c ^= b; c -= rol32(b, 24); \
}
#define __get_unaligned_cpu32(x) (*(u32 *)(x))
/* An arbitrary initial parameter */
#define JHASH_INITVAL 0xdeadbeef
/* jhash - hash an arbitrary key
* @k: sequence of bytes as key
* @length: the length of the key
* @initval: the previous hash, or an arbitray value
*
* The generic version, hashes an arbitrary sequence of bytes.
* No alignment or length assumptions are made about the input key.
*
* Returns the hash value of the key. The result depends on endianness.
*/
static inline u32 jhash(const void *key, u32 length, u32 initval)
{
u32 a, b, c;
const u8 *k = key;
/* Set up the internal state */
a = b = c = JHASH_INITVAL + length + initval;
/* All but the last block: affect some 32 bits of (a,b,c) */
while (length > 12) {
a += __get_unaligned_cpu32(k);
b += __get_unaligned_cpu32(k + 4);
c += __get_unaligned_cpu32(k + 8);
__jhash_mix(a, b, c);
length -= 12;
k += 12;
}
/* Last block: affect all 32 bits of (c) */
/* All the case statements fall through */
switch (length) {
case 12: c += (u32)k[11]<<24;
case 11: c += (u32)k[10]<<16;
case 10: c += (u32)k[9]<<8;
case 9: c += k[8];
case 8: b += (u32)k[7]<<24;
case 7: b += (u32)k[6]<<16;
case 6: b += (u32)k[5]<<8;
case 5: b += k[4];
case 4: a += (u32)k[3]<<24;
case 3: a += (u32)k[2]<<16;
case 2: a += (u32)k[1]<<8;
case 1: a += k[0];
__jhash_final(a, b, c);
case 0: /* Nothing left to add */
break;
}
return c;
}
/* jhash2 - hash an array of u32's
* @k: the key which must be an array of u32's
* @length: the number of u32's in the key
* @initval: the previous hash, or an arbitray value
*
* Returns the hash value of the key.
*/
static inline u32 jhash2(const u32 *k, u32 length, u32 initval)
{
u32 a, b, c;
/* Set up the internal state */
a = b = c = JHASH_INITVAL + (length<<2) + initval;
/* Handle most of the key */
while (length > 3) {
a += k[0];
b += k[1];
c += k[2];
__jhash_mix(a, b, c);
length -= 3;
k += 3;
}
/* Handle the last 3 u32's: all the case statements fall through */
switch (length) {
case 3: c += k[2];
case 2: b += k[1];
case 1: a += k[0];
__jhash_final(a, b, c);
case 0: /* Nothing left to add */
break;
}
return c;
}
/* jhash_3words - hash exactly 3, 2 or 1 word(s) */
static inline u32 jhash_3words(u32 a, u32 b, u32 c, u32 initval)
{
a += JHASH_INITVAL;
b += JHASH_INITVAL;
c += initval;
__jhash_final(a, b, c);
return c;
}
static inline u32 jhash_2words(u32 a, u32 b, u32 initval)
{
return jhash_3words(a, b, 0, initval);
}
static inline u32 jhash_1word(u32 a, u32 initval)
{
return jhash_3words(a, 0, 0, initval);
}
int main(int argc, char **argv)
{
int i;
struct {
void *s;
void *t;
u32 l;
} k = { .s = 0 };
int total;
total = atoi(argv[1]);
for (i = 0; i < total; i++) {
k.l = i;
printf("0x%x\n", jhash2((u32 *)&k, 3, 12345) & (total - 1));
}
return 0;
}
Cheers,
--
Email: Herbert Xu <herbert@...dor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists