From b8960f32b807ad2404d4f09256dc86e00a5c6fdb Mon Sep 17 00:00:00 2001 From: Sean Barrett Date: Sun, 3 Mar 2019 21:36:15 -0800 Subject: [PATCH] stb_ds: major string hash fix, minor other changes - arena and strdup string hashes were badly broken due to not setting up default slot correctly - tweak use of seed in 4-byte and 8-byte hash functions to hopefully be slightly stronger - a few internal #ifdefs for performance tuning --- stb_ds.h | 48 +++++++++++++++++++++++++++++------------------- 1 file changed, 29 insertions(+), 19 deletions(-) diff --git a/stb_ds.h b/stb_ds.h index 96c64d5..7f448ec 100644 --- a/stb_ds.h +++ b/stb_ds.h @@ -274,10 +274,10 @@ NOTES - HASH MAP * For compilers other than GCC and clang (e.g. Visual Studio), for hmput/hmget/hmdel and variants, the key must be an lvalue (so the macro can take the address of it). - For GCC and clang, extensions are used that eliminate this requirement if you're - using C99 and later or using C++. + Extensions are used that eliminate this requirement if you're using C99 and later + in GCC or clang, or if you're using C++ in GCC. - * To test for presence of a key in a hashmap, just do 'hmget(foo,key) >= 0'. + * To test for presence of a key in a hashmap, just do 'hmgeti(foo,key) >= 0'. * The iteration order of your data in the hashmap is determined solely by the order of insertions and deletions. In particular, if you never delete, new @@ -417,7 +417,7 @@ extern void * stbds_shmode_func(size_t elemsize, int mode); #if __clang__ #define STBDS_ADDRESSOF(typevar, value) ((__typeof__(typevar)[1]){value}) // literal array decays to pointer to value #else - #define STBDS_ADDRESSOF(typevar, value) ((typeof(typevar)[]){value}) // literal array decays to pointer to value + #define STBDS_ADDRESSOF(typevar, value) ((typeof(typevar)[1]){value}) // literal array decays to pointer to value #endif #else #define STBDS_ADDRESSOF(typevar, value) &(value) @@ -648,10 +648,15 @@ void *stbds_arrgrowf(void *a, size_t elemsize, size_t addlen, size_t min_cap) // stbds_hm hash table implementation // -#define STBDS_CACHE_LINE_SIZE 64 +#ifdef STBDS_INTERNAL_SMALL_BUCKET +#define STBDS_BUCKET_LENGTH 4 +#else #define STBDS_BUCKET_LENGTH 8 -#define STBDS_BUCKET_SHIFT 3 +#endif + +#define STBDS_BUCKET_SHIFT (STBDS_BUCKET_LENGTH == 8 ? 3 : 2) #define STBDS_BUCKET_MASK (STBDS_BUCKET_LENGTH-1) +#define STBDS_CACHE_LINE_SIZE 64 #define STBDS_ALIGN_FWD(n,a) (((n) + (a) - 1) & ~((a)-1)) @@ -698,13 +703,12 @@ void stbds_rand_seed(size_t seed) static size_t stbds_probe_position(size_t hash, size_t slot_count, size_t slot_log2) { - #if 1 - size_t pos = (hash >> (STBDS_SIZE_T_BITS-slot_log2)); - STBDS_ASSERT(pos < slot_count); - return pos; - #else - return hash & (slot_count-1); + size_t pos; + pos = hash & (slot_count-1); + #ifdef STBDS_INTERNAL_BUCKET_START + pos &= ~STBDS_BUCKET_MASK; #endif + return pos; } static size_t stbds_log2(size_t slot_count) @@ -812,7 +816,6 @@ static stbds_hash_index *stbds_make_hash_index(size_t slot_count, stbds_hash_ind for (;;) { size_t limit,z; stbds_hash_bucket *bucket; - pos &= (t->slot_count-1); bucket = &t->storage[pos >> STBDS_BUCKET_SHIFT]; STBDS_STATS(++stbds_rehash_probes); @@ -835,6 +838,7 @@ static stbds_hash_index *stbds_make_hash_index(size_t slot_count, stbds_hash_ind pos += step; // quadratic probing step += STBDS_BUCKET_LENGTH; + pos &= (t->slot_count-1); } } done: @@ -939,7 +943,7 @@ static size_t stbds_siphash_bytes(void *p, size_t len, size_t seed) #ifdef STBDS_SIPHASH_2_4 return v0^v1^v2^v3; #else - return v1^v2^v3; // slightly stronger since v0^v3 in above cancels out final round operation + return v1^v2^v3; // slightly stronger since v0^v3 in above cancels out final round operation? I tweeted at the authors of SipHash about this but they didn't reply #endif } @@ -954,10 +958,11 @@ size_t stbds_hash_bytes(void *p, size_t len, size_t seed) unsigned int hash = d[0] | (d[1] << 8) | (d[2] << 16) | (d[3] << 24); #if 0 // HASH32-A Bob Jenkin's hash function w/o large constants - hash ^= seed ^ len; + hash ^= seed; hash -= (hash<<6); hash ^= (hash>>17); hash -= (hash<<9); + hash ^= seed; hash ^= (hash<<4); hash -= (hash<<3); hash ^= (hash<<10); @@ -966,22 +971,24 @@ size_t stbds_hash_bytes(void *p, size_t len, size_t seed) // HASH32-BB Bob Jenkin's presumably-accidental version of Thomas Wang hash with rotates turned into shifts. // Note that converting these back to rotates makes it run a lot slower, presumably due to collisions, so I'm // not really sure what's going on. - hash ^= seed ^ len; + hash ^= seed; hash = (hash ^ 61) ^ (hash >> 16); hash = hash + (hash << 3); hash = hash ^ (hash >> 4); hash = hash * 0x27d4eb2d; + hash ^= seed; hash = hash ^ (hash >> 15); #else // HASH32-C - Murmur3 + hash ^= seed; hash *= 0xcc9e2d51; hash = (hash << 17) | (hash >> 15); hash *= 0x1b873593; hash ^= seed; hash = (hash << 19) | (hash >> 13); hash = hash*5 + 0xe6546b64; - hash ^= len; hash ^= hash >> 16; hash *= 0x85ebca6b; + hash ^= seed; hash ^= hash >> 13; hash *= 0xc2b2ae35; hash ^= hash >> 16; @@ -1006,16 +1013,17 @@ size_t stbds_hash_bytes(void *p, size_t len, size_t seed) } else if (len == 8 && sizeof(size_t) == 8) { size_t hash = d[0] | (d[1] << 8) | (d[2] << 16) | (d[3] << 24); hash |= (size_t) (d[4] | (d[5] << 8) | (d[6] << 16) | (d[7] << 24)) << 16 << 16; // avoid warning if size_t == 4 - hash ^= seed ^ len; + hash ^= seed; hash = (~hash) + (hash << 21); hash ^= STBDS_ROTATE_RIGHT(hash,24); hash *= 265; hash ^= STBDS_ROTATE_RIGHT(hash,14); + hash ^= seed; hash *= 21; hash ^= STBDS_ROTATE_RIGHT(hash,28); hash += (hash << 31); hash = (~hash) + (hash << 18); - return hash^seed; + return hash; } else { return stbds_siphash_bytes(p,len,seed); } @@ -1272,6 +1280,8 @@ void * stbds_shmode_func(size_t elemsize, int mode) { void *a = stbds_arrgrowf(0, elemsize, 0, 1); stbds_hash_index *h; + memset(a, 0, elemsize); + stbds_header(a)->length = 1; stbds_header(a)->hash_table = h = (stbds_hash_index *) stbds_make_hash_index(STBDS_BUCKET_LENGTH, NULL); h->string.mode = mode; return STBDS_ARR_TO_HASH(a,elemsize);