#include #include #include #include #include #include #include #include #include #define init_name_hash() 0 /* partial hash update function. Assume roughly 4 bits per character */ static inline unsigned long partial_name_hash(unsigned long c, unsigned long prevhash) { return (prevhash + (c << 4) + (c >> 4)) * 11; } /* Compute the hash for a name string. */ static unsigned int full_name_hash(const unsigned char *name, unsigned int len) { unsigned long hash = 0; while (len--) hash = partial_name_hash(*name++, hash); return hash; } static unsigned int djb2(const unsigned char *str, unsigned int len) { unsigned long hash = 5381; while (len--) hash = ((hash << 5) + hash) + *str++; /* hash * 33 + c */ return hash; } static unsigned int string_hash31(const unsigned char *name, unsigned int len) { unsigned hash = 0; int i; for (i = 0; i < len; ++i) hash = 31 * hash + name[i]; return hash; } static unsigned int string_hash17(const unsigned char *name, unsigned int len) { unsigned hash = 0; int i; for (i = 0; i < len; ++i) hash = 17 * hash + name[i]; return hash; } static unsigned int string10(const unsigned char *name, unsigned int len) { unsigned hash = 0; int i; for (i = 0; i < len; ++i) hash = hash * 10 + name[i] - '0'; return hash; } static const uint32_t FNV_32_PRIME = 16777619u; static const uint32_t FNV1_32_INIT = 2166136261u; static unsigned int fnv32(const unsigned char *key, unsigned int len) { uint32_t hval = FNV1_32_INIT; unsigned int i; for (i = 0; i < len; i++) { hval ^= key[i]; #if defined(NO_FNV_GCC_OPTIMIZATION) hval *= FNV_32_PRIME; #else hval += (hval<<1) + (hval<<4) + (hval<<7) + (hval<<8) + (hval<<24); #endif } return hval; } static unsigned order = 8; static unsigned iterations = 10000; static char **names; static unsigned num_names; static void read_names(void) { char line[1024]; unsigned int n = 0; /* Read input to create name set */ while (fgets(line, sizeof line, stdin) != NULL) { names = realloc(names, (n + 1) * sizeof(char *)); names[n] = malloc(strlen(line) + 1); strcpy(names[n], line); ++n; } num_names = n; } /* 2^31 + 2^29 - 2^25 + 2^22 - 2^19 - 2^16 + 1 */ #define GOLDEN_RATIO_PRIME_32 0x9e370001UL /* 2^63 + 2^61 - 2^57 + 2^54 - 2^51 - 2^18 + 1 */ #define GOLDEN_RATIO_PRIME_64 0x9e37fffffffc0001UL /* Unrolled version of hash_long() */ static inline unsigned int fold(unsigned int val, unsigned int bits) { if (sizeof(val) == 8) { uint64_t hash = val; /* Sigh, gcc can't optimise this alone like it does for 32 bits. */ uint64_t n = hash; n <<= 18; hash -= n; n <<= 33; hash -= n; n <<= 3; hash += n; n <<= 3; hash -= n; n <<= 4; hash += n; n <<= 2; hash += n; /* High bits are more random, so use them. */ return hash >> (64 - bits); } else { /* On some cpus multiply is faster, on others gcc will do shifts */ uint32_t hash = val * GOLDEN_RATIO_PRIME_32; /* High bits are more random, so use them. */ return hash >> (32 - bits); } } static void measure(const char *name, unsigned int (*hash)(const unsigned char *, unsigned int)) { unsigned int i; struct timeval t0, t1; unsigned int hashsz = 1 << order; unsigned int dist[hashsz]; unsigned long long probes = 0; double t; unsigned long m = 0; const double avg = num_names / hashsz; double ideal = hashsz * (avg * (1 + avg)) / 2; double std = 0; memset(dist, 0, sizeof(unsigned int) * hashsz); gettimeofday(&t0, NULL); for (i = 0; i < num_names; i++) { unsigned char *name = (unsigned char *) names[i]; size_t len = strlen(names[i]); unsigned int h = 0, j; for (j = 0; j < iterations; j++) h = hash(name, len); /* fold in extra bits */ ++dist[fold(h, order)]; } gettimeofday(&t1, NULL); t = (double) (t1.tv_sec - t0.tv_sec); t += (double) (t1.tv_usec - t0.tv_usec) / 1000000.; for (i = 0; i < hashsz; i++) { unsigned int n = dist[i]; double delta = (n - avg); if (n > m) m = n; /* longest chain */ std += delta * delta; /* compute standard deviation */ /* number of probes used when accessing is same as sum of arithmetic series */ probes += ((uint64_t) n * (1 + n)) / 2; } printf("%-20s %f", name, t); printf(" %8.2f %9lu %6.2f\n", (double) probes / ideal, m, sqrt(std / hashsz)); } #define MEASURE(func) measure(#func, &func) int main(int argc, char **argv) { int f; while ((f = getopt(argc, argv, "h:n:")) != -1) switch (f) { case 'n': iterations = strtoul(optarg, NULL, 0); break; case 'h': order = strtoul(optarg, NULL, 0); break; default: fprintf(stderr, "Usage: %s -a -h hash -n testsize\n", argv[0]); return 1; } read_names(); printf("Algorithm Time Ratio Max StdDev\n"); MEASURE(full_name_hash); MEASURE(djb2); MEASURE(string10); MEASURE(string_hash17); MEASURE(string_hash31); MEASURE(fnv32); return 0; }