diff --git a/Makefile b/Makefile index 81a33c4..8eb4843 100644 --- a/Makefile +++ b/Makefile @@ -1,14 +1,14 @@ all: unit bench unit : tests/unit.c include/xorfilter.h include/binaryfusefilter.h - cc -std=c99 -O3 -o unit tests/unit.c -lm -Iinclude -Wall -Wextra -Wshadow -Wcast-qual + cc -std=c99 -O3 -o unit tests/unit.c -lm -Iinclude -Wall -Wextra -Wshadow -Wcast-qual -Wconversion -Wsign-conversion ab : tests/a.c tests/b.c - cc -std=c99 -o c tests/a.c tests/b.c -lm -Iinclude -Wall -Wextra -Wshadow -Wcast-qual + cc -std=c99 -o c tests/a.c tests/b.c -lm -Iinclude -Wall -Wextra -Wshadow -Wcast-qual -Wconversion -Wsign-conversion bench : benchmarks/bench.c include/xorfilter.h include/binaryfusefilter.h - cc -std=c99 -O3 -o bench benchmarks/bench.c -lm -Iinclude -Wall -Wextra -Wshadow -Wcast-qual + cc -std=c99 -O3 -o bench benchmarks/bench.c -lm -Iinclude -Wall -Wextra -Wshadow -Wcast-qual -Wconversion -Wsign-conversion test: unit ab ./unit diff --git a/benchmarks/bench.c b/benchmarks/bench.c index 93734d7..653d42a 100644 --- a/benchmarks/bench.c +++ b/benchmarks/bench.c @@ -9,19 +9,19 @@ bool testxor8(size_t size) { xor8_t filter; - xor8_allocate(size, &filter); + xor8_allocate((uint32_t)size, &filter); // we need some set of values uint64_t *big_set = (uint64_t *)malloc(sizeof(uint64_t) * size); for (size_t i = 0; i < size; i++) { big_set[i] = i; // we use contiguous values } // we construct the filter - bool constructed = xor8_populate(big_set, size, &filter); // warm the cache + bool constructed = xor8_populate(big_set, (uint32_t)size, &filter); // warm the cache if(!constructed) { return false; } for (size_t times = 0; times < 5; times++) { clock_t t; t = clock(); - xor8_populate(big_set, size, &filter); + xor8_populate(big_set, (uint32_t)size, &filter); t = clock() - t; double time_taken = ((double)t) / CLOCKS_PER_SEC; // in seconds printf("It took %f seconds to build an index over %zu values. \n", @@ -37,19 +37,19 @@ bool testbufferedxor8(size_t size) { printf("size = %zu \n", size); xor8_t filter; - xor8_allocate(size, &filter); + xor8_allocate((uint32_t)size, &filter); // we need some set of values uint64_t *big_set = (uint64_t *)malloc(sizeof(uint64_t) * size); for (size_t i = 0; i < size; i++) { big_set[i] = i; // we use contiguous values } // we construct the filter - bool constructed = xor8_buffered_populate(big_set, size, &filter); // warm the cache + bool constructed = xor8_buffered_populate(big_set, (uint32_t)size, &filter); // warm the cache if(!constructed) { return false; } for (size_t times = 0; times < 5; times++) { clock_t t; t = clock(); - xor8_buffered_populate(big_set, size, &filter); + xor8_buffered_populate(big_set, (uint32_t)size, &filter); t = clock() - t; double time_taken = ((double)t) / CLOCKS_PER_SEC; // in seconds printf("It took %f seconds to build an index over %zu values. \n", @@ -65,19 +65,19 @@ bool testxor16(size_t size) { printf("size = %zu \n", size); xor16_t filter; - xor16_allocate(size, &filter); + xor16_allocate((uint32_t)size, &filter); // we need some set of values uint64_t *big_set = (uint64_t *)malloc(sizeof(uint64_t) * size); for (size_t i = 0; i < size; i++) { big_set[i] = i; // we use contiguous values } // we construct the filter - bool constructed = xor16_populate(big_set, size, &filter); // warm the cache + bool constructed = xor16_populate(big_set, (uint32_t)size, &filter); // warm the cache if(!constructed) { return false; } for (size_t times = 0; times < 5; times++) { clock_t t; t = clock(); - xor16_populate(big_set, size, &filter); + xor16_populate(big_set, (uint32_t)size, &filter); t = clock() - t; double time_taken = ((double)t) / CLOCKS_PER_SEC; // in seconds printf("It took %f seconds to build an index over %zu values. \n", @@ -93,19 +93,19 @@ bool testbufferedxor16(size_t size) { printf("size = %zu \n", size); xor16_t filter; - xor16_allocate(size, &filter); + xor16_allocate((uint32_t)size, &filter); // we need some set of values uint64_t *big_set = (uint64_t *)malloc(sizeof(uint64_t) * size); for (size_t i = 0; i < size; i++) { big_set[i] = i; // we use contiguous values } // we construct the filter - bool constructed = xor16_buffered_populate(big_set, size, &filter); // warm the cache + bool constructed = xor16_buffered_populate(big_set, (uint32_t)size, &filter); // warm the cache if(!constructed) { return false; } for (size_t times = 0; times < 5; times++) { clock_t t; t = clock(); - xor16_buffered_populate(big_set, size, &filter); + xor16_buffered_populate(big_set, (uint32_t)size, &filter); t = clock() - t; double time_taken = ((double)t) / CLOCKS_PER_SEC; // in seconds printf("It took %f seconds to build an index over %zu values. \n", @@ -122,19 +122,19 @@ bool testbinaryfuse8(size_t size) { binary_fuse8_t filter; - binary_fuse8_allocate(size, &filter); + binary_fuse8_allocate((uint32_t)size, &filter); // we need some set of values uint64_t *big_set = (uint64_t *)malloc(sizeof(uint64_t) * size); for (size_t i = 0; i < size; i++) { big_set[i] = i; // we use contiguous values } // we construct the filter - bool constructed = binary_fuse8_populate(big_set, size, &filter); // warm the cache + bool constructed = binary_fuse8_populate(big_set, (uint32_t)size, &filter); // warm the cache if(!constructed) { return false; } for (size_t times = 0; times < 5; times++) { clock_t t; t = clock(); - binary_fuse8_populate(big_set, size, &filter); + binary_fuse8_populate(big_set, (uint32_t)size, &filter); t = clock() - t; double time_taken = ((double)t) / CLOCKS_PER_SEC; // in seconds printf("It took %f seconds to build an index over %zu values. \n", @@ -151,19 +151,19 @@ bool testbinaryfuse16(size_t size) { binary_fuse16_t filter; - binary_fuse16_allocate(size, &filter); + binary_fuse16_allocate((uint32_t)size, &filter); // we need some set of values uint64_t *big_set = (uint64_t *)malloc(sizeof(uint64_t) * size); for (size_t i = 0; i < size; i++) { big_set[i] = i; // we use contiguous values } // we construct the filter - bool constructed = binary_fuse16_populate(big_set, size, &filter); // warm the cache + bool constructed = binary_fuse16_populate(big_set, (uint32_t)size, &filter); // warm the cache if(!constructed) { return false; } for (size_t times = 0; times < 5; times++) { clock_t t; t = clock(); - binary_fuse16_populate(big_set, size, &filter); + binary_fuse16_populate(big_set, (uint32_t)size, &filter); t = clock() - t; double time_taken = ((double)t) / CLOCKS_PER_SEC; // in seconds printf("It took %f seconds to build an index over %zu values. \n", diff --git a/include/binaryfusefilter.h b/include/binaryfusefilter.h index c9ea43a..7049f3f 100644 --- a/include/binaryfusefilter.h +++ b/include/binaryfusefilter.h @@ -8,13 +8,12 @@ #include #include #ifndef XOR_MAX_ITERATIONS -#define XOR_MAX_ITERATIONS \ - 100 // probability of success should always be > 0.5 so 100 iterations is - // highly unlikely +// probability of success should always be > 0.5 so 100 iterations is highly unlikely +#define XOR_MAX_ITERATIONS 100 #endif static int binary_fuse_cmpfunc(const void * a, const void * b) { - return ( *(const uint64_t*)a - *(const uint64_t*)b ); + return (int)( *(const uint64_t*)a - *(const uint64_t*)b ); } static size_t binary_fuse_sort_and_remove_dup(uint64_t* keys, size_t length) { @@ -33,25 +32,25 @@ static size_t binary_fuse_sort_and_remove_dup(uint64_t* keys, size_t length) { * We start with a few utilities. ***/ static inline uint64_t binary_fuse_murmur64(uint64_t h) { - h ^= h >> 33; + h ^= h >> 33U; h *= UINT64_C(0xff51afd7ed558ccd); - h ^= h >> 33; + h ^= h >> 33U; h *= UINT64_C(0xc4ceb9fe1a85ec53); - h ^= h >> 33; + h ^= h >> 33U; return h; } static inline uint64_t binary_fuse_mix_split(uint64_t key, uint64_t seed) { return binary_fuse_murmur64(key + seed); } static inline uint64_t binary_fuse_rotl64(uint64_t n, unsigned int c) { - return (n << (c & 63)) | (n >> ((-c) & 63)); + return (n << (c & 63U)) | (n >> ((-c) & 63U)); } static inline uint32_t binary_fuse_reduce(uint32_t hash, uint32_t n) { // http://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/ - return (uint32_t)(((uint64_t)hash * n) >> 32); + return (uint32_t)(((uint64_t)hash * n) >> 32U); } -static inline uint64_t binary_fuse8_fingerprint(uint64_t hash) { - return hash ^ (hash >> 32); +static inline uint8_t binary_fuse8_fingerprint(uint64_t hash) { + return (uint8_t)(hash ^ (hash >> 32U)); } /** @@ -61,9 +60,9 @@ static inline uint64_t binary_fuse8_fingerprint(uint64_t hash) { // returns random number, modifies the seed static inline uint64_t binary_fuse_rng_splitmix64(uint64_t *seed) { uint64_t z = (*seed += UINT64_C(0x9E3779B97F4A7C15)); - z = (z ^ (z >> 30)) * UINT64_C(0xBF58476D1CE4E5B9); - z = (z ^ (z >> 27)) * UINT64_C(0x94D049BB133111EB); - return z ^ (z >> 31); + z = (z ^ (z >> 30U)) * UINT64_C(0xBF58476D1CE4E5B9); + z = (z ^ (z >> 27U)) * UINT64_C(0x94D049BB133111EB); + return z ^ (z >> 31U); } typedef struct binary_fuse8_s { @@ -80,7 +79,7 @@ typedef struct binary_fuse8_s { // https://stackoverflow.com/a/50958815 #ifdef __SIZEOF_INT128__ // compilers supporting __uint128, e.g., gcc, clang static inline uint64_t binary_fuse_mulhi(uint64_t a, uint64_t b) { - return ((__uint128_t)a * b) >> 64; + return (uint64_t)(((__uint128_t)a * b) >> 64U); } #elif defined(_M_X64) || defined(_MARM64) // MSVC static inline uint64_t binary_fuse_mulhi(uint64_t a, uint64_t b) { @@ -158,20 +157,20 @@ static inline binary_hashes_t binary_fuse8_hash_batch(uint64_t hash, ans.h0 = (uint32_t)hi; ans.h1 = ans.h0 + filter->SegmentLength; ans.h2 = ans.h1 + filter->SegmentLength; - ans.h1 ^= (uint32_t)(hash >> 18) & filter->SegmentLengthMask; + ans.h1 ^= (uint32_t)(hash >> 18U) & filter->SegmentLengthMask; ans.h2 ^= (uint32_t)(hash)&filter->SegmentLengthMask; return ans; } -static inline uint32_t binary_fuse8_hash(int index, uint64_t hash, +static inline uint32_t binary_fuse8_hash(uint64_t index, uint64_t hash, const binary_fuse8_t *filter) { uint64_t h = binary_fuse_mulhi(hash, filter->SegmentCountLength); h += index * filter->SegmentLength; // keep the lower 36 bits - uint64_t hh = hash & ((1ULL << 36) - 1); + uint64_t hh = hash & ((1ULL << 36U) - 1); // index 0: right shift by 36; index 1: right shift by 18; index 2: no shift h ^= (size_t)((hh >> (36 - 18 * index)) & filter->SegmentLengthMask); - return h; + return (uint32_t)h; } // Report if the key is in the set, with false positive rate. @@ -180,7 +179,8 @@ static inline bool binary_fuse8_contain(uint64_t key, uint64_t hash = binary_fuse_mix_split(key, filter->Seed); uint8_t f = binary_fuse8_fingerprint(hash); binary_hashes_t hashes = binary_fuse8_hash_batch(hash, filter); - f ^= filter->Fingerprints[hashes.h0] ^ filter->Fingerprints[hashes.h1] ^ + f ^= (uint32_t)filter->Fingerprints[hashes.h0] ^ + filter->Fingerprints[hashes.h1] ^ filter->Fingerprints[hashes.h2]; return f == 0; } @@ -190,12 +190,12 @@ static inline uint32_t binary_fuse_calculate_segment_length(uint32_t arity, // These parameters are very sensitive. Replacing 'floor' by 'round' can // substantially affect the construction time. if (arity == 3) { - return ((uint32_t)1) << (int)(floor(log((double)(size)) / log(3.33) + 2.25)); - } else if (arity == 4) { - return ((uint32_t)1) << (int)(floor(log((double)(size)) / log(2.91) - 0.5)); - } else { - return 65536; + return ((uint32_t)1) << (unsigned)(floor(log((double)(size)) / log(3.33) + 2.25)); + } + if (arity == 4) { + return ((uint32_t)1) << (unsigned)(floor(log((double)(size)) / log(2.91) - 0.5)); } + return 65536; } static inline double binary_fuse_max(double a, double b) { @@ -209,11 +209,11 @@ static inline double binary_fuse_calculate_size_factor(uint32_t arity, uint32_t size) { if (arity == 3) { return binary_fuse_max(1.125, 0.875 + 0.25 * log(1000000.0) / log((double)size)); - } else if (arity == 4) { + } + if (arity == 4) { return binary_fuse_max(1.075, 0.77 + 0.305 * log(600000.0) / log((double)size)); - } else { - return 2.0; } + return 2.0; } // allocate enough capacity for a set containing up to 'size' elements @@ -290,7 +290,7 @@ static inline bool binary_fuse8_populate(uint64_t *keys, uint32_t size, blockBits += 1; } uint32_t block = ((uint32_t)1 << blockBits); - uint32_t *startPos = (uint32_t *)malloc((1 << blockBits) * sizeof(uint32_t)); + uint32_t *startPos = (uint32_t *)malloc((1U << blockBits) * sizeof(uint32_t)); uint32_t h012[5]; if ((alone == NULL) || (t2count == NULL) || (reverseH == NULL) || @@ -308,7 +308,7 @@ static inline bool binary_fuse8_populate(uint64_t *keys, uint32_t size, if (loop + 1 > XOR_MAX_ITERATIONS) { // The probability of this happening is lower than the // the cosmic-ray probability (i.e., a cosmic ray corrupts your system) - memset(filter->Fingerprints, ~0, filter->ArrayLength); + memset(filter->Fingerprints, 0xFF, filter->ArrayLength); free(alone); free(t2count); free(reverseH); @@ -321,7 +321,7 @@ static inline bool binary_fuse8_populate(uint64_t *keys, uint32_t size, for (uint32_t i = 0; i < block; i++) { // important : i * size would overflow as a 32-bit number in some // cases. - startPos[i] = ((uint64_t)i * size) >> blockBits; + startPos[i] = (uint32_t)((uint64_t)i * size) >> blockBits; } uint64_t maskblock = block - 1; @@ -344,12 +344,12 @@ static inline bool binary_fuse8_populate(uint64_t *keys, uint32_t size, t2hash[h0] ^= hash; uint32_t h1= binary_fuse8_hash(1, hash, filter); t2count[h1] += 4; - t2count[h1] ^= 1; + t2count[h1] ^= 1U; t2hash[h1] ^= hash; uint32_t h2 = binary_fuse8_hash(2, hash, filter); t2count[h2] += 4; t2hash[h2] ^= hash; - t2count[h2] ^= 2; + t2count[h2] ^= 2U; if ((t2hash[h0] & t2hash[h1] & t2hash[h2]) == 0) { if (((t2hash[h0] == 0) && (t2count[h0] == 8)) || ((t2hash[h1] == 0) && (t2count[h1] == 8)) @@ -358,10 +358,10 @@ static inline bool binary_fuse8_populate(uint64_t *keys, uint32_t size, t2count[h0] -= 4; t2hash[h0] ^= hash; t2count[h1] -= 4; - t2count[h1] ^= 1; + t2count[h1] ^= 1U; t2hash[h1] ^= hash; t2count[h2] -= 4; - t2count[h2] ^= 2; + t2count[h2] ^= 2U; t2hash[h2] ^= hash; } } @@ -382,13 +382,13 @@ static inline bool binary_fuse8_populate(uint64_t *keys, uint32_t size, // Add sets with one key to the queue. for (uint32_t i = 0; i < capacity; i++) { alone[Qsize] = i; - Qsize += ((t2count[i] >> 2) == 1) ? 1 : 0; + Qsize += ((t2count[i] >> 2U) == 1) ? 1U : 0U; } uint32_t stacksize = 0; while (Qsize > 0) { Qsize--; uint32_t index = alone[Qsize]; - if ((t2count[index] >> 2) == 1) { + if ((t2count[index] >> 2U) == 1) { uint64_t hash = t2hash[index]; //h012[0] = binary_fuse8_hash(0, hash, filter); @@ -396,13 +396,13 @@ static inline bool binary_fuse8_populate(uint64_t *keys, uint32_t size, h012[2] = binary_fuse8_hash(2, hash, filter); h012[3] = binary_fuse8_hash(0, hash, filter); // == h012[0]; h012[4] = h012[1]; - uint8_t found = t2count[index] & 3; + uint8_t found = t2count[index] & 3U; reverseH[stacksize] = found; reverseOrder[stacksize] = hash; stacksize++; uint32_t other_index1 = h012[found + 1]; alone[Qsize] = other_index1; - Qsize += ((t2count[other_index1] >> 2) == 2 ? 1 : 0); + Qsize += ((t2count[other_index1] >> 2U) == 2 ? 1U : 0U); t2count[other_index1] -= 4; t2count[other_index1] ^= binary_fuse_mod3(found + 1); @@ -410,7 +410,7 @@ static inline bool binary_fuse8_populate(uint64_t *keys, uint32_t size, uint32_t other_index2 = h012[found + 2]; alone[Qsize] = other_index2; - Qsize += ((t2count[other_index2] >> 2) == 2 ? 1 : 0); + Qsize += ((t2count[other_index2] >> 2U) == 2 ? 1U : 0U); t2count[other_index2] -= 4; t2count[other_index2] ^= binary_fuse_mod3(found + 2); t2hash[other_index2] ^= hash; @@ -420,8 +420,9 @@ static inline bool binary_fuse8_populate(uint64_t *keys, uint32_t size, // success size = stacksize; break; - } else if(duplicates > 0) { - size = binary_fuse_sort_and_remove_dup(keys, size); + } + if(duplicates > 0) { + size = (uint32_t)binary_fuse_sort_and_remove_dup(keys, size); } memset(reverseOrder, 0, sizeof(uint64_t) * size); memset(t2count, 0, sizeof(uint8_t) * capacity); @@ -439,9 +440,9 @@ static inline bool binary_fuse8_populate(uint64_t *keys, uint32_t size, h012[2] = binary_fuse8_hash(2, hash, filter); h012[3] = h012[0]; h012[4] = h012[1]; - filter->Fingerprints[h012[found]] = xor2 ^ - filter->Fingerprints[h012[found + 1]] ^ - filter->Fingerprints[h012[found + 2]]; + filter->Fingerprints[h012[found]] = (uint8_t)((uint32_t)xor2 ^ + filter->Fingerprints[h012[found + 1]] ^ + filter->Fingerprints[h012[found + 2]]); } free(alone); free(t2count); @@ -466,8 +467,8 @@ typedef struct binary_fuse16_s { uint16_t *Fingerprints; } binary_fuse16_t; -static inline uint64_t binary_fuse16_fingerprint(uint64_t hash) { - return hash ^ (hash >> 32); +static inline uint16_t binary_fuse16_fingerprint(uint64_t hash) { + return (uint16_t)(hash ^ (hash >> 32U)); } static inline binary_hashes_t binary_fuse16_hash_batch(uint64_t hash, @@ -477,19 +478,19 @@ static inline binary_hashes_t binary_fuse16_hash_batch(uint64_t hash, ans.h0 = (uint32_t)hi; ans.h1 = ans.h0 + filter->SegmentLength; ans.h2 = ans.h1 + filter->SegmentLength; - ans.h1 ^= (uint32_t)(hash >> 18) & filter->SegmentLengthMask; + ans.h1 ^= (uint32_t)(hash >> 18U) & filter->SegmentLengthMask; ans.h2 ^= (uint32_t)(hash)&filter->SegmentLengthMask; return ans; } -static inline uint32_t binary_fuse16_hash(int index, uint64_t hash, +static inline uint32_t binary_fuse16_hash(uint64_t index, uint64_t hash, const binary_fuse16_t *filter) { uint64_t h = binary_fuse_mulhi(hash, filter->SegmentCountLength); h += index * filter->SegmentLength; // keep the lower 36 bits - uint64_t hh = hash & ((1ULL << 36) - 1); + uint64_t hh = hash & ((1ULL << 36U) - 1); // index 0: right shift by 36; index 1: right shift by 18; index 2: no shift h ^= (size_t)((hh >> (36 - 18 * index)) & filter->SegmentLengthMask); - return h; + return (uint32_t)h; } // Report if the key is in the set, with false positive rate. @@ -498,7 +499,8 @@ static inline bool binary_fuse16_contain(uint64_t key, uint64_t hash = binary_fuse_mix_split(key, filter->Seed); uint16_t f = binary_fuse16_fingerprint(hash); binary_hashes_t hashes = binary_fuse16_hash_batch(hash, filter); - f ^= filter->Fingerprints[hashes.h0] ^ filter->Fingerprints[hashes.h1] ^ + f ^= (uint32_t)filter->Fingerprints[hashes.h0] ^ + filter->Fingerprints[hashes.h1] ^ filter->Fingerprints[hashes.h2]; return f == 0; } @@ -575,7 +577,7 @@ static inline bool binary_fuse16_populate(uint64_t *keys, uint32_t size, blockBits += 1; } uint32_t block = ((uint32_t)1 << blockBits); - uint32_t *startPos = (uint32_t *)malloc((1 << blockBits) * sizeof(uint32_t)); + uint32_t *startPos = (uint32_t *)malloc((1U << blockBits) * sizeof(uint32_t)); uint32_t h012[5]; if ((alone == NULL) || (t2count == NULL) || (reverseH == NULL) || @@ -605,7 +607,7 @@ static inline bool binary_fuse16_populate(uint64_t *keys, uint32_t size, for (uint32_t i = 0; i < block; i++) { // important : i * size would overflow as a 32-bit number in some // cases. - startPos[i] = ((uint64_t)i * size) >> blockBits; + startPos[i] = (uint32_t)(((uint64_t)i * size) >> blockBits); } uint64_t maskblock = block - 1; @@ -628,12 +630,12 @@ static inline bool binary_fuse16_populate(uint64_t *keys, uint32_t size, t2hash[h0] ^= hash; uint32_t h1= binary_fuse16_hash(1, hash, filter); t2count[h1] += 4; - t2count[h1] ^= 1; + t2count[h1] ^= 1U; t2hash[h1] ^= hash; uint32_t h2 = binary_fuse16_hash(2, hash, filter); t2count[h2] += 4; t2hash[h2] ^= hash; - t2count[h2] ^= 2; + t2count[h2] ^= 2U; if ((t2hash[h0] & t2hash[h1] & t2hash[h2]) == 0) { if (((t2hash[h0] == 0) && (t2count[h0] == 8)) || ((t2hash[h1] == 0) && (t2count[h1] == 8)) @@ -642,10 +644,10 @@ static inline bool binary_fuse16_populate(uint64_t *keys, uint32_t size, t2count[h0] -= 4; t2hash[h0] ^= hash; t2count[h1] -= 4; - t2count[h1] ^= 1; + t2count[h1] ^= 1U; t2hash[h1] ^= hash; t2count[h2] -= 4; - t2count[h2] ^= 2; + t2count[h2] ^= 2U; t2hash[h2] ^= hash; } } @@ -666,13 +668,13 @@ static inline bool binary_fuse16_populate(uint64_t *keys, uint32_t size, // Add sets with one key to the queue. for (uint32_t i = 0; i < capacity; i++) { alone[Qsize] = i; - Qsize += ((t2count[i] >> 2) == 1) ? 1 : 0; + Qsize += ((t2count[i] >> 2U) == 1) ? 1U : 0U; } uint32_t stacksize = 0; while (Qsize > 0) { Qsize--; uint32_t index = alone[Qsize]; - if ((t2count[index] >> 2) == 1) { + if ((t2count[index] >> 2U) == 1) { uint64_t hash = t2hash[index]; //h012[0] = binary_fuse16_hash(0, hash, filter); @@ -680,13 +682,13 @@ static inline bool binary_fuse16_populate(uint64_t *keys, uint32_t size, h012[2] = binary_fuse16_hash(2, hash, filter); h012[3] = binary_fuse16_hash(0, hash, filter); // == h012[0]; h012[4] = h012[1]; - uint8_t found = t2count[index] & 3; + uint8_t found = t2count[index] & 3U; reverseH[stacksize] = found; reverseOrder[stacksize] = hash; stacksize++; uint32_t other_index1 = h012[found + 1]; alone[Qsize] = other_index1; - Qsize += ((t2count[other_index1] >> 2) == 2 ? 1 : 0); + Qsize += ((t2count[other_index1] >> 2U) == 2 ? 1U : 0U); t2count[other_index1] -= 4; t2count[other_index1] ^= binary_fuse_mod3(found + 1); @@ -694,7 +696,7 @@ static inline bool binary_fuse16_populate(uint64_t *keys, uint32_t size, uint32_t other_index2 = h012[found + 2]; alone[Qsize] = other_index2; - Qsize += ((t2count[other_index2] >> 2) == 2 ? 1 : 0); + Qsize += ((t2count[other_index2] >> 2U) == 2 ? 1U : 0U); t2count[other_index2] -= 4; t2count[other_index2] ^= binary_fuse_mod3(found + 2); t2hash[other_index2] ^= hash; @@ -704,8 +706,9 @@ static inline bool binary_fuse16_populate(uint64_t *keys, uint32_t size, // success size = stacksize; break; - } else if(duplicates > 0) { - size = binary_fuse_sort_and_remove_dup(keys, size); + } + if(duplicates > 0) { + size = (uint32_t)binary_fuse_sort_and_remove_dup(keys, size); } memset(reverseOrder, 0, sizeof(uint64_t) * size); memset(t2count, 0, sizeof(uint8_t) * capacity); @@ -723,9 +726,10 @@ static inline bool binary_fuse16_populate(uint64_t *keys, uint32_t size, h012[2] = binary_fuse16_hash(2, hash, filter); h012[3] = h012[0]; h012[4] = h012[1]; - filter->Fingerprints[h012[found]] = xor2 ^ - filter->Fingerprints[h012[found + 1]] ^ - filter->Fingerprints[h012[found + 2]]; + filter->Fingerprints[h012[found]] = (uint16_t)( + (uint32_t)xor2 ^ + (uint32_t)filter->Fingerprints[h012[found + 1]] ^ + (uint32_t)filter->Fingerprints[h012[found + 2]]); } free(alone); free(t2count); diff --git a/include/xorfilter.h b/include/xorfilter.h index c86fa80..49de6d9 100644 --- a/include/xorfilter.h +++ b/include/xorfilter.h @@ -12,12 +12,13 @@ #endif #ifndef XOR_MAX_ITERATIONS -#define XOR_MAX_ITERATIONS 100 // probabillity of success should always be > 0.5 so 100 iterations is highly unlikely +// probabillity of success should always be > 0.5 so 100 iterations is highly unlikely +#define XOR_MAX_ITERATIONS 100 #endif static int xor_cmpfunc(const void * a, const void * b) { - return ( *(const uint64_t*)a - *(const uint64_t*)b ); + return (int)( *(const uint64_t*)a - *(const uint64_t*)b ); } static size_t xor_sort_and_remove_dup(uint64_t* keys, size_t length) { @@ -43,11 +44,11 @@ static size_t xor_sort_and_remove_dup(uint64_t* keys, size_t length) { * We start with a few utilities. ***/ static inline uint64_t xor_murmur64(uint64_t h) { - h ^= h >> 33; + h ^= h >> 33U; h *= UINT64_C(0xff51afd7ed558ccd); - h ^= h >> 33; + h ^= h >> 33U; h *= UINT64_C(0xc4ceb9fe1a85ec53); - h ^= h >> 33; + h ^= h >> 33U; return h; } @@ -56,16 +57,16 @@ static inline uint64_t xor_mix_split(uint64_t key, uint64_t seed) { } static inline uint64_t xor_rotl64(uint64_t n, unsigned int c) { - return (n << (c & 63)) | (n >> ((-c) & 63)); + return (n << (c & 63U)) | (n >> ((-c) & 63U)); } static inline uint32_t xor_reduce(uint32_t hash, uint32_t n) { // http://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/ - return (uint32_t)(((uint64_t)hash * n) >> 32); + return (uint32_t)(((uint64_t)hash * n) >> 32U); } static inline uint64_t xor_fingerprint(uint64_t hash) { - return hash ^ (hash >> 32); + return hash ^ (hash >> 32U); } /** @@ -75,9 +76,9 @@ static inline uint64_t xor_fingerprint(uint64_t hash) { // returns random number, modifies the seed static inline uint64_t xor_rng_splitmix64(uint64_t *seed) { uint64_t z = (*seed += UINT64_C(0x9E3779B97F4A7C15)); - z = (z ^ (z >> 30)) * UINT64_C(0xBF58476D1CE4E5B9); - z = (z ^ (z >> 27)) * UINT64_C(0x94D049BB133111EB); - return z ^ (z >> 31); + z = (z ^ (z >> 30U)) * UINT64_C(0xBF58476D1CE4E5B9); + z = (z ^ (z >> 27U)) * UINT64_C(0x94D049BB133111EB); + return z ^ (z >> 31U); } /** @@ -94,15 +95,16 @@ typedef struct xor8_s { // Report if the key is in the set, with false positive rate. static inline bool xor8_contain(uint64_t key, const xor8_t *filter) { uint64_t hash = xor_mix_split(key, filter->seed); - uint8_t f = xor_fingerprint(hash); + uint8_t f = (uint8_t)xor_fingerprint(hash); uint32_t r0 = (uint32_t)hash; uint32_t r1 = (uint32_t)xor_rotl64(hash, 21); uint32_t r2 = (uint32_t)xor_rotl64(hash, 42); - uint32_t h0 = xor_reduce(r0, filter->blockLength); - uint32_t h1 = xor_reduce(r1, filter->blockLength) + filter->blockLength; - uint32_t h2 = xor_reduce(r2, filter->blockLength) + 2 * filter->blockLength; - return f == (filter->fingerprints[h0] ^ filter->fingerprints[h1] ^ - filter->fingerprints[h2]); + uint32_t h0 = xor_reduce(r0, (uint32_t)filter->blockLength); + uint32_t h1 = xor_reduce(r1, (uint32_t)filter->blockLength) + (uint32_t)filter->blockLength; + uint32_t h2 = xor_reduce(r2, (uint32_t)filter->blockLength) + 2 * (uint32_t)filter->blockLength; + return f == ((uint32_t)filter->fingerprints[h0] ^ + filter->fingerprints[h1] ^ + filter->fingerprints[h2]); } typedef struct xor16_s { @@ -115,43 +117,42 @@ typedef struct xor16_s { // Report if the key is in the set, with false positive rate. static inline bool xor16_contain(uint64_t key, const xor16_t *filter) { uint64_t hash = xor_mix_split(key, filter->seed); - uint16_t f = xor_fingerprint(hash); + uint16_t f = (uint16_t)xor_fingerprint(hash); uint32_t r0 = (uint32_t)hash; uint32_t r1 = (uint32_t)xor_rotl64(hash, 21); uint32_t r2 = (uint32_t)xor_rotl64(hash, 42); - uint32_t h0 = xor_reduce(r0, filter->blockLength); - uint32_t h1 = xor_reduce(r1, filter->blockLength) + filter->blockLength; - uint32_t h2 = xor_reduce(r2, filter->blockLength) + 2 * filter->blockLength; - return f == (filter->fingerprints[h0] ^ filter->fingerprints[h1] ^ - filter->fingerprints[h2]); + uint32_t h0 = xor_reduce(r0, (uint32_t)filter->blockLength); + uint32_t h1 = xor_reduce(r1, (uint32_t)filter->blockLength) + (uint32_t)filter->blockLength; + uint32_t h2 = xor_reduce(r2, (uint32_t)filter->blockLength) + 2 * (uint32_t)filter->blockLength; + return f == ((uint32_t)filter->fingerprints[h0] ^ + filter->fingerprints[h1] ^ + filter->fingerprints[h2]); } // allocate enough capacity for a set containing up to 'size' elements // caller is responsible to call xor8_free(filter) static inline bool xor8_allocate(uint32_t size, xor8_t *filter) { - size_t capacity = 32 + 1.23 * size; + size_t capacity = (size_t)(32 + 1.23 * size); capacity = capacity / 3 * 3; filter->fingerprints = (uint8_t *)malloc(capacity * sizeof(uint8_t)); if (filter->fingerprints != NULL) { filter->blockLength = capacity / 3; return true; - } else { - return false; } + return false; } // allocate enough capacity for a set containing up to 'size' elements // caller is responsible to call xor16_free(filter) static inline bool xor16_allocate(uint32_t size, xor16_t *filter) { - size_t capacity = 32 + 1.23 * size; + size_t capacity = (size_t)(32 + 1.23 * size); capacity = capacity / 3 * 3; filter->fingerprints = (uint16_t *)malloc(capacity * sizeof(uint16_t)); if (filter->fingerprints != NULL) { filter->blockLength = capacity / 3; return true; - } else { - return false; - } + } + return false; } // report memory usage @@ -202,9 +203,9 @@ static inline xor_hashes_t xor8_get_h0_h1_h2(uint64_t k, const xor8_t *filter) { uint32_t r1 = (uint32_t)xor_rotl64(hash, 21); uint32_t r2 = (uint32_t)xor_rotl64(hash, 42); - answer.h0 = xor_reduce(r0, filter->blockLength); - answer.h1 = xor_reduce(r1, filter->blockLength); - answer.h2 = xor_reduce(r2, filter->blockLength); + answer.h0 = xor_reduce(r0, (uint32_t)filter->blockLength); + answer.h1 = xor_reduce(r1, (uint32_t)filter->blockLength); + answer.h2 = xor_reduce(r2, (uint32_t)filter->blockLength); return answer; } @@ -218,27 +219,27 @@ typedef struct xor_h0h1h2_s xor_h0h1h2_t; static inline uint32_t xor8_get_h0(uint64_t hash, const xor8_t *filter) { uint32_t r0 = (uint32_t)hash; - return xor_reduce(r0, filter->blockLength); + return xor_reduce(r0, (uint32_t)filter->blockLength); } static inline uint32_t xor8_get_h1(uint64_t hash, const xor8_t *filter) { uint32_t r1 = (uint32_t)xor_rotl64(hash, 21); - return xor_reduce(r1, filter->blockLength); + return xor_reduce(r1, (uint32_t)filter->blockLength); } static inline uint32_t xor8_get_h2(uint64_t hash, const xor8_t *filter) { uint32_t r2 = (uint32_t)xor_rotl64(hash, 42); - return xor_reduce(r2, filter->blockLength); + return xor_reduce(r2, (uint32_t)filter->blockLength); } static inline uint32_t xor16_get_h0(uint64_t hash, const xor16_t *filter) { uint32_t r0 = (uint32_t)hash; - return xor_reduce(r0, filter->blockLength); + return xor_reduce(r0, (uint32_t)filter->blockLength); } static inline uint32_t xor16_get_h1(uint64_t hash, const xor16_t *filter) { uint32_t r1 = (uint32_t)xor_rotl64(hash, 21); - return xor_reduce(r1, filter->blockLength); + return xor_reduce(r1, (uint32_t)filter->blockLength); } static inline uint32_t xor16_get_h2(uint64_t hash, const xor16_t *filter) { uint32_t r2 = (uint32_t)xor_rotl64(hash, 42); - return xor_reduce(r2, filter->blockLength); + return xor_reduce(r2, (uint32_t)filter->blockLength); } static inline xor_hashes_t xor16_get_h0_h1_h2(uint64_t k, const xor16_t *filter) { @@ -249,9 +250,9 @@ static inline xor_hashes_t xor16_get_h0_h1_h2(uint64_t k, uint32_t r1 = (uint32_t)xor_rotl64(hash, 21); uint32_t r2 = (uint32_t)xor_rotl64(hash, 42); - answer.h0 = xor_reduce(r0, filter->blockLength); - answer.h1 = xor_reduce(r1, filter->blockLength); - answer.h2 = xor_reduce(r2, filter->blockLength); + answer.h0 = xor_reduce(r0, (uint32_t)filter->blockLength); + answer.h1 = xor_reduce(r1, (uint32_t)filter->blockLength); + answer.h2 = xor_reduce(r2, (uint32_t)filter->blockLength); return answer; } @@ -265,7 +266,7 @@ typedef struct xor_keyindex_s xor_keyindex_t; struct xor_setbuffer_s { xor_keyindex_t *buffer; uint32_t *counts; - int insignificantbits; + int insignificantbits; // should be an unsigned type to avoid a lot of casts uint32_t slotsize; // should be 1<< insignificantbits uint32_t slotcount; size_t originalsize; @@ -276,10 +277,10 @@ typedef struct xor_setbuffer_s xor_setbuffer_t; static inline bool xor_init_buffer(xor_setbuffer_t *buffer, size_t size) { buffer->originalsize = size; buffer->insignificantbits = 18; - buffer->slotsize = UINT32_C(1) << buffer->insignificantbits; - buffer->slotcount = (size + buffer->slotsize - 1) / buffer->slotsize; + buffer->slotsize = UINT32_C(1) << (uint32_t)buffer->insignificantbits; + buffer->slotcount = (uint32_t)(size + buffer->slotsize - 1) / buffer->slotsize; buffer->buffer = (xor_keyindex_t *)malloc( - buffer->slotcount * buffer->slotsize * sizeof(xor_keyindex_t)); + (size_t)buffer->slotcount * buffer->slotsize * sizeof(xor_keyindex_t)); buffer->counts = (uint32_t *)malloc(buffer->slotcount * sizeof(uint32_t)); if ((buffer->counts == NULL) || (buffer->buffer == NULL)) { free(buffer->counts); @@ -300,12 +301,12 @@ static inline void xor_free_buffer(xor_setbuffer_t *buffer) { static inline void xor_buffered_increment_counter(uint32_t index, uint64_t hash, xor_setbuffer_t *buffer, xor_xorset_t *sets) { - uint32_t slot = index >> buffer->insignificantbits; - size_t addr = buffer->counts[slot] + (slot << buffer->insignificantbits); + uint32_t slot = index >> (uint32_t)buffer->insignificantbits; + size_t addr = buffer->counts[slot] + (slot << (uint32_t)buffer->insignificantbits); buffer->buffer[addr].index = index; buffer->buffer[addr].hash = hash; buffer->counts[slot]++; - size_t offset = (slot << buffer->insignificantbits); + size_t offset = (slot << (uint32_t)buffer->insignificantbits); if (buffer->counts[slot] == buffer->slotsize) { // must empty the buffer for (size_t i = offset; i < buffer->slotsize + offset; i++) { @@ -321,10 +322,10 @@ static inline void xor_buffered_increment_counter(uint32_t index, uint64_t hash, static inline void xor_make_buffer_current(xor_setbuffer_t *buffer, xor_xorset_t *sets, uint32_t index, xor_keyindex_t *Q, size_t *Qsize) { - uint32_t slot = index >> buffer->insignificantbits; + uint32_t slot = index >> (uint32_t)buffer->insignificantbits; if(buffer->counts[slot] > 0) { // uncommon! size_t qsize = *Qsize; - size_t offset = (slot << buffer->insignificantbits); + size_t offset = (slot << (uint32_t)buffer->insignificantbits); for (size_t i = offset; i < buffer->counts[slot] + offset; i++) { xor_keyindex_t ki = buffer->buffer[i]; sets[ki.index].xormask ^= ki.hash; @@ -347,14 +348,14 @@ static inline void xor_buffered_decrement_counter(uint32_t index, uint64_t hash, xor_xorset_t *sets, xor_keyindex_t *Q, size_t *Qsize) { - uint32_t slot = index >> buffer->insignificantbits; - size_t addr = buffer->counts[slot] + (slot << buffer->insignificantbits); + uint32_t slot = index >> (uint32_t)buffer->insignificantbits; + size_t addr = buffer->counts[slot] + (slot << (uint32_t)buffer->insignificantbits); buffer->buffer[addr].index = index; buffer->buffer[addr].hash = hash; buffer->counts[slot]++; if (buffer->counts[slot] == buffer->slotsize) { size_t qsize = *Qsize; - size_t offset = (slot << buffer->insignificantbits); + size_t offset = (slot << (uint32_t)buffer->insignificantbits); for (size_t i = offset; i < buffer->counts[slot] + offset; i++) { xor_keyindex_t ki = buffer->buffer[i]; @@ -374,7 +375,7 @@ static inline void xor_buffered_decrement_counter(uint32_t index, uint64_t hash, static inline void xor_flush_increment_buffer(xor_setbuffer_t *buffer, xor_xorset_t *sets) { for (uint32_t slot = 0; slot < buffer->slotcount; slot++) { - size_t offset = (slot << buffer->insignificantbits); + size_t offset = (slot << (uint32_t)buffer->insignificantbits); for (size_t i = offset; i < buffer->counts[slot] + offset; i++) { xor_keyindex_t ki = buffer->buffer[i]; @@ -391,7 +392,7 @@ static inline void xor_flush_decrement_buffer(xor_setbuffer_t *buffer, size_t *Qsize) { size_t qsize = *Qsize; for (uint32_t slot = 0; slot < buffer->slotcount; slot++) { - uint32_t base = (slot << buffer->insignificantbits); + uint32_t base = (slot << (uint32_t)buffer->insignificantbits); for (size_t i = base; i < buffer->counts[slot] + base; i++) { xor_keyindex_t ki = buffer->buffer[i]; sets[ki.index].xormask ^= ki.hash; @@ -422,7 +423,7 @@ static inline uint32_t xor_flushone_decrement_buffer(xor_setbuffer_t *buffer, uint32_t slot = bestslot; size_t qsize = *Qsize; // for(uint32_t slot = 0; slot < buffer->slotcount; slot++) { - uint32_t base = (slot << buffer->insignificantbits); + uint32_t base = (slot << (uint32_t)buffer->insignificantbits); for (size_t i = base; i < buffer->counts[slot] + base; i++) { xor_keyindex_t ki = buffer->buffer[i]; sets[ki.index].xormask ^= ki.hash; @@ -491,7 +492,7 @@ static inline bool xor8_buffered_populate(uint64_t *keys, uint32_t size, xor8_t while (true) { iterations ++; if(iterations == XOR_SORT_ITERATIONS) { - size = xor_sort_and_remove_dup(keys, size); + size = (uint32_t)xor_sort_and_remove_dup(keys, size); } if(iterations > XOR_MAX_ITERATIONS) { // The probability of this happening is lower than the @@ -522,7 +523,7 @@ static inline bool xor8_buffered_populate(uint64_t *keys, uint32_t size, xor8_t size_t Q0size = 0, Q1size = 0, Q2size = 0; for (size_t i = 0; i < filter->blockLength; i++) { if (sets0[i].count == 1) { - Q0[Q0size].index = i; + Q0[Q0size].index = (uint32_t)i; Q0[Q0size].hash = sets0[i].xormask; Q0size++; } @@ -530,14 +531,14 @@ static inline bool xor8_buffered_populate(uint64_t *keys, uint32_t size, xor8_t for (size_t i = 0; i < filter->blockLength; i++) { if (sets1[i].count == 1) { - Q1[Q1size].index = i; + Q1[Q1size].index = (uint32_t)i; Q1[Q1size].hash = sets1[i].xormask; Q1size++; } } for (size_t i = 0; i < filter->blockLength; i++) { if (sets2[i].count == 1) { - Q2[Q2size].index = i; + Q2[Q2size].index = (uint32_t)i; Q2[Q2size].hash = sets2[i].xormask; Q2size++; } @@ -548,7 +549,7 @@ static inline bool xor8_buffered_populate(uint64_t *keys, uint32_t size, xor8_t while (Q0size > 0) { xor_keyindex_t keyindex = Q0[--Q0size]; size_t index = keyindex.index; - xor_make_buffer_current(&buffer0, sets0, index, Q0, &Q0size); + xor_make_buffer_current(&buffer0, sets0, (uint32_t)index, Q0, &Q0size); if (sets0[index].count == 0) continue; // not actually possible after the initial scan. @@ -570,7 +571,7 @@ static inline bool xor8_buffered_populate(uint64_t *keys, uint32_t size, xor8_t while (Q1size > 0) { xor_keyindex_t keyindex = Q1[--Q1size]; size_t index = keyindex.index; - xor_make_buffer_current(&buffer1, sets1, index, Q1, &Q1size); + xor_make_buffer_current(&buffer1, sets1, (uint32_t)index, Q1, &Q1size); if (sets1[index].count == 0) continue; @@ -578,7 +579,7 @@ static inline bool xor8_buffered_populate(uint64_t *keys, uint32_t size, xor8_t uint64_t hash = keyindex.hash; uint32_t h0 = xor8_get_h0(hash, filter); uint32_t h2 = xor8_get_h2(hash, filter); - keyindex.index += blockLength; + keyindex.index += (uint32_t)blockLength; stack[stack_size] = keyindex; stack_size++; xor_buffered_decrement_counter(h0, hash, &buffer0, sets0, Q0, &Q0size); @@ -590,7 +591,7 @@ static inline bool xor8_buffered_populate(uint64_t *keys, uint32_t size, xor8_t while (Q2size > 0) { xor_keyindex_t keyindex = Q2[--Q2size]; size_t index = keyindex.index; - xor_make_buffer_current(&buffer2, sets2, index, Q2, &Q2size); + xor_make_buffer_current(&buffer2, sets2, (uint32_t)index, Q2, &Q2size); if (sets2[index].count == 0) continue; @@ -599,7 +600,7 @@ static inline bool xor8_buffered_populate(uint64_t *keys, uint32_t size, xor8_t uint32_t h0 = xor8_get_h0(hash, filter); uint32_t h1 = xor8_get_h1(hash, filter); - keyindex.index += 2 * blockLength; + keyindex.index += 2 * (uint32_t)blockLength; stack[stack_size] = keyindex; stack_size++; @@ -632,13 +633,13 @@ static inline bool xor8_buffered_populate(uint64_t *keys, uint32_t size, xor8_t xor_keyindex_t ki = stack[--stack_size]; uint64_t val = xor_fingerprint(ki.hash); if(ki.index < blockLength) { - val ^= fingerprints1[xor8_get_h1(ki.hash,filter)] ^ fingerprints2[xor8_get_h2(ki.hash,filter)]; + val ^= (uint32_t)fingerprints1[xor8_get_h1(ki.hash,filter)] ^ fingerprints2[xor8_get_h2(ki.hash,filter)]; } else if(ki.index < 2 * blockLength) { - val ^= fingerprints0[xor8_get_h0(ki.hash,filter)] ^ fingerprints2[xor8_get_h2(ki.hash,filter)]; + val ^= (uint32_t)fingerprints0[xor8_get_h0(ki.hash,filter)] ^ fingerprints2[xor8_get_h2(ki.hash,filter)]; } else { - val ^= fingerprints0[xor8_get_h0(ki.hash,filter)] ^ fingerprints1[xor8_get_h1(ki.hash,filter)]; + val ^= (uint32_t)fingerprints0[xor8_get_h0(ki.hash,filter)] ^ fingerprints1[xor8_get_h1(ki.hash,filter)]; } - filter->fingerprints[ki.index] = val; + filter->fingerprints[ki.index] = (uint8_t)val; } xor_free_buffer(&buffer0); xor_free_buffer(&buffer1); @@ -689,7 +690,7 @@ static inline bool xor8_populate(uint64_t *keys, uint32_t size, xor8_t *filter) while (true) { iterations ++; if(iterations == XOR_SORT_ITERATIONS) { - size = xor_sort_and_remove_dup(keys, size); + size = (uint32_t)xor_sort_and_remove_dup(keys, size); } if(iterations > XOR_MAX_ITERATIONS) { // The probability of this happening is lower than the @@ -716,7 +717,7 @@ static inline bool xor8_populate(uint64_t *keys, uint32_t size, xor8_t *filter) size_t Q0size = 0, Q1size = 0, Q2size = 0; for (size_t i = 0; i < filter->blockLength; i++) { if (sets0[i].count == 1) { - Q0[Q0size].index = i; + Q0[Q0size].index = (uint32_t)i; Q0[Q0size].hash = sets0[i].xormask; Q0size++; } @@ -724,14 +725,14 @@ static inline bool xor8_populate(uint64_t *keys, uint32_t size, xor8_t *filter) for (size_t i = 0; i < filter->blockLength; i++) { if (sets1[i].count == 1) { - Q1[Q1size].index = i; + Q1[Q1size].index = (uint32_t)i; Q1[Q1size].hash = sets1[i].xormask; Q1size++; } } for (size_t i = 0; i < filter->blockLength; i++) { if (sets2[i].count == 1) { - Q2[Q2size].index = i; + Q2[Q2size].index = (uint32_t)i; Q2[Q2size].hash = sets2[i].xormask; Q2size++; } @@ -775,7 +776,7 @@ static inline bool xor8_populate(uint64_t *keys, uint32_t size, xor8_t *filter) uint64_t hash = keyindex.hash; uint32_t h0 = xor8_get_h0(hash, filter); uint32_t h2 = xor8_get_h2(hash, filter); - keyindex.index += blockLength; + keyindex.index += (uint32_t)blockLength; stack[stack_size] = keyindex; stack_size++; sets0[h0].xormask ^= hash; @@ -804,7 +805,7 @@ static inline bool xor8_populate(uint64_t *keys, uint32_t size, xor8_t *filter) uint32_t h0 = xor8_get_h0(hash, filter); uint32_t h1 = xor8_get_h1(hash, filter); - keyindex.index += 2 * blockLength; + keyindex.index += 2 * (uint32_t)blockLength; stack[stack_size] = keyindex; stack_size++; @@ -841,13 +842,13 @@ static inline bool xor8_populate(uint64_t *keys, uint32_t size, xor8_t *filter) xor_keyindex_t ki = stack[--stack_size]; uint64_t val = xor_fingerprint(ki.hash); if(ki.index < blockLength) { - val ^= fingerprints1[xor8_get_h1(ki.hash,filter)] ^ fingerprints2[xor8_get_h2(ki.hash,filter)]; + val ^= (uint32_t)fingerprints1[xor8_get_h1(ki.hash,filter)] ^ fingerprints2[xor8_get_h2(ki.hash,filter)]; } else if(ki.index < 2 * blockLength) { - val ^= fingerprints0[xor8_get_h0(ki.hash,filter)] ^ fingerprints2[xor8_get_h2(ki.hash,filter)]; + val ^= (uint32_t)fingerprints0[xor8_get_h0(ki.hash,filter)] ^ fingerprints2[xor8_get_h2(ki.hash,filter)]; } else { - val ^= fingerprints0[xor8_get_h0(ki.hash,filter)] ^ fingerprints1[xor8_get_h1(ki.hash,filter)]; + val ^= (uint32_t)fingerprints0[xor8_get_h0(ki.hash,filter)] ^ fingerprints1[xor8_get_h1(ki.hash,filter)]; } - filter->fingerprints[ki.index] = val; + filter->fingerprints[ki.index] = (uint8_t)val; } free(sets); @@ -909,7 +910,7 @@ static inline bool xor16_buffered_populate(uint64_t *keys, uint32_t size, xor16_ while (true) { iterations ++; if(iterations == XOR_SORT_ITERATIONS) { - size = xor_sort_and_remove_dup(keys, size); + size = (uint32_t)xor_sort_and_remove_dup(keys, size); } if(iterations > XOR_MAX_ITERATIONS) { // The probability of this happening is lower than the @@ -941,7 +942,7 @@ static inline bool xor16_buffered_populate(uint64_t *keys, uint32_t size, xor16_ size_t Q0size = 0, Q1size = 0, Q2size = 0; for (size_t i = 0; i < filter->blockLength; i++) { if (sets0[i].count == 1) { - Q0[Q0size].index = i; + Q0[Q0size].index = (uint32_t)i; Q0[Q0size].hash = sets0[i].xormask; Q0size++; } @@ -949,14 +950,14 @@ static inline bool xor16_buffered_populate(uint64_t *keys, uint32_t size, xor16_ for (size_t i = 0; i < filter->blockLength; i++) { if (sets1[i].count == 1) { - Q1[Q1size].index = i; + Q1[Q1size].index = (uint32_t)i; Q1[Q1size].hash = sets1[i].xormask; Q1size++; } } for (size_t i = 0; i < filter->blockLength; i++) { if (sets2[i].count == 1) { - Q2[Q2size].index = i; + Q2[Q2size].index = (uint32_t)i; Q2[Q2size].hash = sets2[i].xormask; Q2size++; } @@ -967,7 +968,7 @@ static inline bool xor16_buffered_populate(uint64_t *keys, uint32_t size, xor16_ while (Q0size > 0) { xor_keyindex_t keyindex = Q0[--Q0size]; size_t index = keyindex.index; - xor_make_buffer_current(&buffer0, sets0, index, Q0, &Q0size); + xor_make_buffer_current(&buffer0, sets0, (uint32_t)index, Q0, &Q0size); if (sets0[index].count == 0) continue; // not actually possible after the initial scan. @@ -989,7 +990,7 @@ static inline bool xor16_buffered_populate(uint64_t *keys, uint32_t size, xor16_ while (Q1size > 0) { xor_keyindex_t keyindex = Q1[--Q1size]; size_t index = keyindex.index; - xor_make_buffer_current(&buffer1, sets1, index, Q1, &Q1size); + xor_make_buffer_current(&buffer1, sets1, (uint32_t)index, Q1, &Q1size); if (sets1[index].count == 0) continue; @@ -997,7 +998,7 @@ static inline bool xor16_buffered_populate(uint64_t *keys, uint32_t size, xor16_ uint64_t hash = keyindex.hash; uint32_t h0 = xor16_get_h0(hash, filter); uint32_t h2 = xor16_get_h2(hash, filter); - keyindex.index += blockLength; + keyindex.index += (uint32_t)blockLength; stack[stack_size] = keyindex; stack_size++; xor_buffered_decrement_counter(h0, hash, &buffer0, sets0, Q0, &Q0size); @@ -1009,7 +1010,7 @@ static inline bool xor16_buffered_populate(uint64_t *keys, uint32_t size, xor16_ while (Q2size > 0) { xor_keyindex_t keyindex = Q2[--Q2size]; size_t index = keyindex.index; - xor_make_buffer_current(&buffer2, sets2, index, Q2, &Q2size); + xor_make_buffer_current(&buffer2, sets2, (uint32_t)index, Q2, &Q2size); if (sets2[index].count == 0) continue; @@ -1018,7 +1019,7 @@ static inline bool xor16_buffered_populate(uint64_t *keys, uint32_t size, xor16_ uint32_t h0 = xor16_get_h0(hash, filter); uint32_t h1 = xor16_get_h1(hash, filter); - keyindex.index += 2 * blockLength; + keyindex.index += 2 * (uint32_t)blockLength; stack[stack_size] = keyindex; stack_size++; @@ -1051,13 +1052,13 @@ static inline bool xor16_buffered_populate(uint64_t *keys, uint32_t size, xor16_ xor_keyindex_t ki = stack[--stack_size]; uint64_t val = xor_fingerprint(ki.hash); if(ki.index < blockLength) { - val ^= fingerprints1[xor16_get_h1(ki.hash,filter)] ^ fingerprints2[xor16_get_h2(ki.hash,filter)]; + val ^= (uint32_t)fingerprints1[xor16_get_h1(ki.hash,filter)] ^ fingerprints2[xor16_get_h2(ki.hash,filter)]; } else if(ki.index < 2 * blockLength) { - val ^= fingerprints0[xor16_get_h0(ki.hash,filter)] ^ fingerprints2[xor16_get_h2(ki.hash,filter)]; + val ^= (uint32_t)fingerprints0[xor16_get_h0(ki.hash,filter)] ^ fingerprints2[xor16_get_h2(ki.hash,filter)]; } else { - val ^= fingerprints0[xor16_get_h0(ki.hash,filter)] ^ fingerprints1[xor16_get_h1(ki.hash,filter)]; + val ^= (uint32_t)fingerprints0[xor16_get_h0(ki.hash,filter)] ^ fingerprints1[xor16_get_h1(ki.hash,filter)]; } - filter->fingerprints[ki.index] = val; + filter->fingerprints[ki.index] = (uint16_t)val; } xor_free_buffer(&buffer0); xor_free_buffer(&buffer1); @@ -1111,7 +1112,7 @@ static inline bool xor16_populate(uint64_t *keys, uint32_t size, xor16_t *filter while (true) { iterations ++; if(iterations == XOR_SORT_ITERATIONS) { - size = xor_sort_and_remove_dup(keys, size); + size = (uint32_t)xor_sort_and_remove_dup(keys, size); } if(iterations > XOR_MAX_ITERATIONS) { // The probability of this happening is lower than the @@ -1138,7 +1139,7 @@ static inline bool xor16_populate(uint64_t *keys, uint32_t size, xor16_t *filter size_t Q0size = 0, Q1size = 0, Q2size = 0; for (size_t i = 0; i < filter->blockLength; i++) { if (sets0[i].count == 1) { - Q0[Q0size].index = i; + Q0[Q0size].index = (uint32_t)i; Q0[Q0size].hash = sets0[i].xormask; Q0size++; } @@ -1146,14 +1147,14 @@ static inline bool xor16_populate(uint64_t *keys, uint32_t size, xor16_t *filter for (size_t i = 0; i < filter->blockLength; i++) { if (sets1[i].count == 1) { - Q1[Q1size].index = i; + Q1[Q1size].index = (uint32_t)i; Q1[Q1size].hash = sets1[i].xormask; Q1size++; } } for (size_t i = 0; i < filter->blockLength; i++) { if (sets2[i].count == 1) { - Q2[Q2size].index = i; + Q2[Q2size].index = (uint32_t)i; Q2[Q2size].hash = sets2[i].xormask; Q2size++; } @@ -1197,7 +1198,7 @@ static inline bool xor16_populate(uint64_t *keys, uint32_t size, xor16_t *filter uint64_t hash = keyindex.hash; uint32_t h0 = xor16_get_h0(hash, filter); uint32_t h2 = xor16_get_h2(hash, filter); - keyindex.index += blockLength; + keyindex.index += (uint32_t)blockLength; stack[stack_size] = keyindex; stack_size++; sets0[h0].xormask ^= hash; @@ -1226,7 +1227,7 @@ static inline bool xor16_populate(uint64_t *keys, uint32_t size, xor16_t *filter uint32_t h0 = xor16_get_h0(hash, filter); uint32_t h1 = xor16_get_h1(hash, filter); - keyindex.index += 2 * blockLength; + keyindex.index += 2 * (uint32_t)blockLength; stack[stack_size] = keyindex; stack_size++; @@ -1263,13 +1264,13 @@ static inline bool xor16_populate(uint64_t *keys, uint32_t size, xor16_t *filter xor_keyindex_t ki = stack[--stack_size]; uint64_t val = xor_fingerprint(ki.hash); if(ki.index < blockLength) { - val ^= fingerprints1[xor16_get_h1(ki.hash,filter)] ^ fingerprints2[xor16_get_h2(ki.hash,filter)]; + val ^= (uint32_t)fingerprints1[xor16_get_h1(ki.hash,filter)] ^ fingerprints2[xor16_get_h2(ki.hash,filter)]; } else if(ki.index < 2 * blockLength) { - val ^= fingerprints0[xor16_get_h0(ki.hash,filter)] ^ fingerprints2[xor16_get_h2(ki.hash,filter)]; + val ^= (uint32_t)fingerprints0[xor16_get_h0(ki.hash,filter)] ^ fingerprints2[xor16_get_h2(ki.hash,filter)]; } else { - val ^= fingerprints0[xor16_get_h0(ki.hash,filter)] ^ fingerprints1[xor16_get_h1(ki.hash,filter)]; + val ^= (uint32_t)fingerprints0[xor16_get_h0(ki.hash,filter)] ^ fingerprints1[xor16_get_h1(ki.hash,filter)]; } - filter->fingerprints[ki.index] = val; + filter->fingerprints[ki.index] = (uint16_t)val; } free(sets); diff --git a/tests/unit.c b/tests/unit.c index e5c2a62..3a30b96 100644 --- a/tests/unit.c +++ b/tests/unit.c @@ -2,400 +2,242 @@ #include "xorfilter.h" #include -bool testbufferedxor8(size_t size) { - printf("testing buffered xor8\n"); - - xor8_t filter; - xor8_allocate(size, &filter); +// generic function dispatch + +bool gen_xor8_allocate(uint32_t size, void *filter) { return xor8_allocate(size, filter); } +bool gen_xor16_allocate(uint32_t size, void *filter) { return xor16_allocate(size, filter); } +bool gen_binary_fuse8_allocate(uint32_t size, void *filter) { return binary_fuse8_allocate(size, filter); } +bool gen_binary_fuse16_allocate(uint32_t size, void *filter) { return binary_fuse16_allocate(size, filter); } + +void gen_xor8_free(void *filter) { xor8_free(filter); } +void gen_xor16_free(void *filter) { xor16_free(filter); } +void gen_binary_fuse8_free(void *filter) { binary_fuse8_free(filter); } +void gen_binary_fuse16_free(void *filter) { binary_fuse16_free(filter); } + +size_t gen_xor8_size_in_bytes(const void *filter) { return xor8_size_in_bytes(filter); } +size_t gen_xor16_size_in_bytes(const void *filter) { return xor16_size_in_bytes(filter); } +size_t gen_binary_fuse8_size_in_bytes(const void *filter) { return binary_fuse8_size_in_bytes(filter); } +size_t gen_binary_fuse16_size_in_bytes(const void *filter) { return binary_fuse16_size_in_bytes(filter); } + +size_t gen_xor8_serialization_bytes(void *filter) { return xor8_serialization_bytes(filter); } +size_t gen_xor16_serialization_bytes(void *filter) { return xor16_serialization_bytes(filter); } +size_t gen_binary_fuse8_serialization_bytes(void *filter) { return binary_fuse8_serialization_bytes(filter); } +size_t gen_binary_fuse16_serialization_bytes(void *filter) { return binary_fuse16_serialization_bytes(filter); } + +void gen_xor8_serialize(void *filter, char *buffer) { xor8_serialize(filter, buffer); } +void gen_xor16_serialize(void *filter, char *buffer) { xor16_serialize(filter, buffer); } +void gen_binary_fuse8_serialize(void *filter, char *buffer) { binary_fuse8_serialize(filter, buffer); } +void gen_binary_fuse16_serialize(void *filter, char *buffer) { binary_fuse16_serialize(filter, buffer); } + +bool gen_xor8_deserialize(void *filter, const char *buffer) { return xor8_deserialize(filter, buffer); } +bool gen_xor16_deserialize(void *filter, const char *buffer) { return xor16_deserialize(filter, buffer); } +bool gen_binary_fuse8_deserialize(void *filter, const char *buffer) { return binary_fuse8_deserialize(filter, buffer); } +bool gen_binary_fuse16_deserialize(void *filter, const char *buffer) { return binary_fuse16_deserialize(filter, buffer); } + +bool gen_xor8_populate(uint64_t *keys, uint32_t size, void *filter) { return xor8_populate(keys, size, filter); } +bool gen_xor8_buffered_populate(uint64_t *keys, uint32_t size, void *filter) { return xor8_buffered_populate(keys, size, filter); } +bool gen_xor16_populate(uint64_t *keys, uint32_t size, void *filter) { return xor16_populate(keys, size, filter); } +bool gen_xor16_buffered_populate(uint64_t *keys, uint32_t size, void *filter) { return xor16_buffered_populate(keys, size, filter); } +bool gen_binary_fuse8_populate(uint64_t *keys, uint32_t size, void *filter) { return binary_fuse8_populate(keys, size, filter); } +bool gen_binary_fuse16_populate(uint64_t *keys, uint32_t size, void *filter) { return binary_fuse16_populate(keys, size, filter); } + +bool gen_xor8_contain(uint64_t key, const void *filter) { return xor8_contain(key, filter); } +bool gen_xor16_contain(uint64_t key, const void *filter) { return xor16_contain(key, filter); } +bool gen_binary_fuse8_contain(uint64_t key, const void *filter) { return binary_fuse8_contain(key, filter); } +bool gen_binary_fuse16_contain(uint64_t key, const void *filter) { return binary_fuse16_contain(key, filter); } + +typedef bool (*allocate_fpt)(uint32_t size, void *filter); +typedef void (*free_fpt)(void *filter); +typedef size_t (*size_in_bytes_fpt)(const void *filter); +typedef size_t (*serialization_bytes_fpt)(void *filter); +typedef void (*serialize_fpt)(void *filter, char *buffer); +typedef bool (*deserialize_fpt)(void *filter, const char *buffer); +typedef bool (*populate_fpt)(uint64_t *keys, uint32_t size, void *filter); +typedef bool (*contain_fpt)(uint64_t key, const void *filter); + +// generic test runner + +bool test(size_t size, size_t repeated_size, void *filter, + allocate_fpt allocate, + free_fpt free_filter, + size_in_bytes_fpt size_in_bytes, + serialization_bytes_fpt serialization_bytes, + serialize_fpt serialize, + deserialize_fpt deserialize, + populate_fpt populate, + contain_fpt contain) { + allocate((uint32_t)size, filter); // we need some set of values uint64_t *big_set = (uint64_t *)malloc(sizeof(uint64_t) * size); - for (size_t i = 0; i < size; i++) { + for (size_t i = 0; i < size - repeated_size; i++) { big_set[i] = i; // we use contiguous values } - // we construct the filter - if(!xor8_buffered_populate(big_set, size, &filter)) { return false; } - for (size_t i = 0; i < size; i++) { - if (!xor8_contain(big_set[i], &filter)) { - printf("bug!\n"); - return false; - } - } - - size_t random_matches = 0; - size_t trials = 10000000; - for (size_t i = 0; i < trials; i++) { - uint64_t random_key = ((uint64_t)rand() << 32) + rand(); - if (xor8_contain(random_key, &filter)) { - if (random_key >= size) { - random_matches++; - } - } - } - double fpp = random_matches * 1.0 / trials; - printf(" fpp %3.5f (estimated) \n", fpp); - double bpe = xor8_size_in_bytes(&filter) * 8.0 / size; - printf(" bits per entry %3.2f\n", bpe); - printf(" bits per entry %3.2f (theoretical lower bound)\n", - log(fpp)/log(2)); - printf(" efficiency ratio %3.3f \n", bpe /(- log(fpp)/log(2))); - - xor8_free(&filter); - free(big_set); - return true; -} - - -bool testxor8(size_t size) { - printf("testing xor8\n"); - - xor8_t filter; - xor8_allocate(size, &filter); - // we need some set of values - uint64_t *big_set = (uint64_t *)malloc(sizeof(uint64_t) * size); - for (size_t i = 0; i < size; i++) { - big_set[i] = i; // we use contiguous values + for (size_t i = 0; i < repeated_size; i++) { + big_set[size - i - 1] = i; // we use contiguous values } // we construct the filter - if(!xor8_populate(big_set, size, &filter)) { return false; } + if(!populate(big_set, (uint32_t)size, filter)) { return false; } for (size_t i = 0; i < size; i++) { - if (!xor8_contain(big_set[i], &filter)) { + if (!contain(big_set[i], filter)) { printf("bug!\n"); return false; } } - size_t buffer_size = xor8_serialization_bytes(&filter); + size_t buffer_size = serialization_bytes(filter); char *buffer = (char*)malloc(buffer_size); - xor8_serialize(&filter, buffer); - xor8_free(&filter); - xor8_deserialize(&filter, buffer); + serialize(filter, buffer); + free_filter(filter); + deserialize(filter, buffer); free(buffer); for (size_t i = 0; i < size; i++) { - if (!xor8_contain(big_set[i], &filter)) { + if (!contain(big_set[i], filter)) { printf("bug!\n"); return false; } } - + size_t random_matches = 0; size_t trials = 10000000; for (size_t i = 0; i < trials; i++) { - uint64_t random_key = ((uint64_t)rand() << 32) + rand(); - if (xor8_contain(random_key, &filter)) { + uint64_t random_key = ((uint64_t)rand() << 32U) + (uint64_t)rand(); + if (contain(random_key, filter)) { if (random_key >= size) { random_matches++; } } } - double fpp = random_matches * 1.0 / trials; + double fpp = (double)random_matches * 1.0 / (double)trials; printf(" fpp %3.5f (estimated) \n", fpp); - double bpe = xor8_size_in_bytes(&filter) * 8.0 / size; + double bpe = (double)size_in_bytes(filter) * 8.0 / (double)size; printf(" bits per entry %3.2f\n", bpe); printf(" bits per entry %3.2f (theoretical lower bound)\n", - log(fpp)/log(2)); printf(" efficiency ratio %3.3f \n", bpe /(- log(fpp)/log(2))); - xor8_free(&filter); + free_filter(filter); free(big_set); return true; } -bool testxor16(size_t size) { - printf("testing xor16\n"); - xor16_t filter; - xor16_allocate(size, &filter); - // we need some set of values - uint64_t *big_set = (uint64_t *)malloc(sizeof(uint64_t) * size); - for (size_t i = 0; i < size; i++) { - big_set[i] = i; // we use contiguous values - } - // we construct the filter - if(!xor16_populate(big_set, size, &filter)) { return false; } - for (size_t i = 0; i < size; i++) { - if (!xor16_contain(big_set[i], &filter)) { - printf("bug!\n"); - return false; - } - } +bool testbufferedxor8(size_t size) { + printf("testing buffered xor8\n"); + xor8_t filter = {0}; // zero initialisation silences unitialized warning + return test(size, 0, &filter, + gen_xor8_allocate, + gen_xor8_free, + gen_xor8_size_in_bytes, + gen_xor8_serialization_bytes, + gen_xor8_serialize, + gen_xor8_deserialize, + gen_xor8_buffered_populate, + gen_xor8_contain); +} - size_t buffer_size = xor16_serialization_bytes(&filter); - char *buffer = (char*)malloc(buffer_size); - xor16_serialize(&filter, buffer); - xor16_free(&filter); - xor16_deserialize(&filter, buffer); - free(buffer); - for (size_t i = 0; i < size; i++) { - if (!xor16_contain(big_set[i], &filter)) { - printf("bug!\n"); - return false; - } - } - size_t random_matches = 0; - size_t trials = 10000000; - for (size_t i = 0; i < trials; i++) { - uint64_t random_key = ((uint64_t)rand() << 32) + rand(); - if (xor16_contain(random_key, &filter)) { - if (random_key >= size) { - random_matches++; - } - } - } - double fpp = random_matches * 1.0 / trials; - printf(" fpp %3.5f (estimated) \n", fpp); - double bpe = xor16_size_in_bytes(&filter) * 8.0 / size; - printf(" bits per entry %3.2f\n", bpe); - printf(" bits per entry %3.2f (theoretical lower bound)\n", - log(fpp)/log(2)); - printf(" efficiency ratio %3.3f \n", bpe /(- log(fpp)/log(2))); - xor16_free(&filter); - free(big_set); - return true; +bool testxor8(size_t size) { + printf("testing xor8\n"); + + xor8_t filter = {0}; // zero initialisation silences unitialized warning + return test(size, 0, &filter, + gen_xor8_allocate, + gen_xor8_free, + gen_xor8_size_in_bytes, + gen_xor8_serialization_bytes, + gen_xor8_serialize, + gen_xor8_deserialize, + gen_xor8_populate, + gen_xor8_contain); } +bool testxor16(size_t size) { + printf("testing xor16\n"); + xor16_t filter = {0}; // zero initialisation silences unitialized warning + return test(size, 0, &filter, + gen_xor16_allocate, + gen_xor16_free, + gen_xor16_size_in_bytes, + gen_xor16_serialization_bytes, + gen_xor16_serialize, + gen_xor16_deserialize, + gen_xor16_populate, + gen_xor16_contain); +} -bool testbufferedxor16(size_t size) { - printf("testing buffered xor16\n"); - xor16_t filter; - xor16_allocate(size, &filter); - // we need some set of values - uint64_t *big_set = (uint64_t *)malloc(sizeof(uint64_t) * size); - for (size_t i = 0; i < size; i++) { - big_set[i] = i; // we use contiguous values - } - // we construct the filter - if(!xor16_buffered_populate(big_set, size, &filter)) { return false; } - for (size_t i = 0; i < size; i++) { - if (!xor16_contain(big_set[i], &filter)) { - printf("bug!\n"); - return false; - } - } - size_t buffer_size = xor16_serialization_bytes(&filter); - char *buffer = (char*)malloc(buffer_size); - xor16_serialize(&filter, buffer); - xor16_free(&filter); - xor16_deserialize(&filter, buffer); - free(buffer); - for (size_t i = 0; i < size; i++) { - if (!xor16_contain(big_set[i], &filter)) { - printf("bug!\n"); - return false; - } - } - size_t random_matches = 0; - size_t trials = 10000000; - for (size_t i = 0; i < trials; i++) { - uint64_t random_key = ((uint64_t)rand() << 32) + rand(); - if (xor16_contain(random_key, &filter)) { - if (random_key >= size) { - random_matches++; - } - } - } - double fpp = random_matches * 1.0 / trials; - printf(" fpp %3.5f (estimated) \n", fpp); - double bpe = xor16_size_in_bytes(&filter) * 8.0 / size; - printf(" bits per entry %3.2f\n", bpe); - printf(" bits per entry %3.2f (theoretical lower bound)\n", - log(fpp)/log(2)); - printf(" efficiency ratio %3.3f \n", bpe /(- log(fpp)/log(2))); - xor16_free(&filter); - free(big_set); - return true; +bool testbufferedxor16(size_t size) { + printf("testing buffered xor16\n"); + xor16_t filter = {0}; // zero initialisation silences unitialized warning + return test(size, 0, &filter, + gen_xor16_allocate, + gen_xor16_free, + gen_xor16_size_in_bytes, + gen_xor16_serialization_bytes, + gen_xor16_serialize, + gen_xor16_deserialize, + gen_xor16_buffered_populate, + gen_xor16_contain); } bool testbinaryfuse8(size_t size) { printf("testing binary fuse8 with size %zu\n", size); - binary_fuse8_t filter; - binary_fuse8_allocate(size, &filter); - // we need some set of values - uint64_t *big_set = (uint64_t *)malloc(sizeof(uint64_t) * size); - for (size_t i = 0; i < size; i++) { - big_set[i] = i; // we use contiguous values - } - // we construct the filter - if(!binary_fuse8_populate(big_set, size, &filter)) { printf("failure to populate\n"); return false; } - for (size_t i = 0; i < size; i++) { - if (!binary_fuse8_contain(big_set[i], &filter)) { - printf("bug!\n"); - return false; - } - } - - size_t buffer_size = binary_fuse8_serialization_bytes(&filter); - char *buffer = (char*)malloc(buffer_size); - binary_fuse8_serialize(&filter, buffer); - binary_fuse8_free(&filter); - binary_fuse8_deserialize(&filter, buffer); - free(buffer); - for (size_t i = 0; i < size; i++) { - if (!binary_fuse8_contain(big_set[i], &filter)) { - printf("bug!\n"); - return false; - } - } - - size_t random_matches = 0; - size_t trials = 10000000; - for (size_t i = 0; i < trials; i++) { - uint64_t random_key = ((uint64_t)rand() << 32) + rand(); - if (binary_fuse8_contain(random_key, &filter)) { - if (random_key >= size) { - random_matches++; - } - } - } - double fpp = random_matches * 1.0 / trials; - printf(" fpp %3.5f (estimated) \n", fpp); - double bpe = binary_fuse8_size_in_bytes(&filter) * 8.0 / size; - printf(" bits per entry %3.2f\n", bpe); - printf(" bits per entry %3.2f (theoretical lower bound)\n", - log(fpp)/log(2)); - printf(" efficiency ratio %3.3f \n", bpe /(- log(fpp)/log(2))); - binary_fuse8_free(&filter); - free(big_set); - return true; + binary_fuse8_t filter = {0}; // zero initialisation silences unitialized warning + return test(size, 0, &filter, + gen_binary_fuse8_allocate, + gen_binary_fuse8_free, + gen_binary_fuse8_size_in_bytes, + gen_binary_fuse8_serialization_bytes, + gen_binary_fuse8_serialize, + gen_binary_fuse8_deserialize, + gen_binary_fuse8_populate, + gen_binary_fuse8_contain); } bool testbinaryfuse16(size_t size) { printf("testing binary fuse16\n"); - binary_fuse16_t filter; - binary_fuse16_allocate(size, &filter); - // we need some set of values - uint64_t *big_set = (uint64_t *)malloc(sizeof(uint64_t) * size); - for (size_t i = 0; i < size; i++) { - big_set[i] = i; // we use contiguous values - } - // we construct the filter - if(!binary_fuse16_populate(big_set, size, &filter)) { printf("failure to populate\n"); return false; } - for (size_t i = 0; i < size; i++) { - if (!binary_fuse16_contain(big_set[i], &filter)) { - printf("bug!\n"); - return false; - } - } - - size_t buffer_size = binary_fuse16_serialization_bytes(&filter); - char *buffer = (char*)malloc(buffer_size); - binary_fuse16_serialize(&filter, buffer); - binary_fuse16_free(&filter); - binary_fuse16_deserialize(&filter, buffer); - free(buffer); - for (size_t i = 0; i < size; i++) { - if (!binary_fuse16_contain(big_set[i], &filter)) { - printf("bug!\n"); - return false; - } - } - - size_t random_matches = 0; - size_t trials = 10000000; - for (size_t i = 0; i < trials; i++) { - uint64_t random_key = ((uint64_t)rand() << 32) + rand(); - if (binary_fuse16_contain(random_key, &filter)) { - if (random_key >= size) { - random_matches++; - } - } - } - double fpp = random_matches * 1.0 / trials; - printf(" fpp %3.5f (estimated) \n", fpp); - double bpe = binary_fuse16_size_in_bytes(&filter) * 8.0 / size; - printf(" bits per entry %3.2f\n", bpe); - printf(" bits per entry %3.2f (theoretical lower bound)\n", - log(fpp)/log(2)); - printf(" efficiency ratio %3.3f \n", bpe /(- log(fpp)/log(2))); - binary_fuse16_free(&filter); - free(big_set); - return true; + binary_fuse16_t filter = {0}; // zero initialisation silences unitialized warning + return test(size, 0, &filter, + gen_binary_fuse16_allocate, + gen_binary_fuse16_free, + gen_binary_fuse16_size_in_bytes, + gen_binary_fuse16_serialization_bytes, + gen_binary_fuse16_serialize, + gen_binary_fuse16_deserialize, + gen_binary_fuse16_populate, + gen_binary_fuse16_contain); } bool testbinaryfuse8_dup(size_t size) { printf("testing binary fuse8 with duplicates\n"); - binary_fuse8_t filter; - binary_fuse8_allocate(size, &filter); - // we need some set of values - uint64_t *big_set = (uint64_t *)malloc(sizeof(uint64_t) * size); - size_t repeated_size = 10; - for (size_t i = 0; i < size - repeated_size; i++) { - big_set[i] = i; // we use contiguous values - } - for (size_t i = 0; i < repeated_size; i++) { - big_set[size - i - 1] = i; // we use contiguous values - } - // we construct the filter - if(!binary_fuse8_populate(big_set, size, &filter)) { return false; } - for (size_t i = 0; i < size; i++) { - if (!binary_fuse8_contain(big_set[i], &filter)) { - printf("bug!\n"); - return false; - } - } - - size_t random_matches = 0; - size_t trials = 10000000; - for (size_t i = 0; i < trials; i++) { - uint64_t random_key = ((uint64_t)rand() << 32) + rand(); - if (binary_fuse8_contain(random_key, &filter)) { - if (random_key >= size) { - random_matches++; - } - } - } - double fpp = random_matches * 1.0 / trials; - printf(" fpp %3.5f (estimated) \n", fpp); - double bpe = binary_fuse8_size_in_bytes(&filter) * 8.0 / size; - printf(" bits per entry %3.2f\n", bpe); - printf(" bits per entry %3.2f (theoretical lower bound)\n", - log(fpp)/log(2)); - printf(" efficiency ratio %3.3f \n", bpe /(- log(fpp)/log(2))); - binary_fuse8_free(&filter); - free(big_set); - return true; + binary_fuse8_t filter = {0}; // zero initialisation silences unitialized warning + return test(size, 10, &filter, + gen_binary_fuse8_allocate, + gen_binary_fuse8_free, + gen_binary_fuse8_size_in_bytes, + gen_binary_fuse8_serialization_bytes, + gen_binary_fuse8_serialize, + gen_binary_fuse8_deserialize, + gen_binary_fuse8_populate, + gen_binary_fuse8_contain); } bool testbinaryfuse16_dup(size_t size) { printf("testing binary fuse16 with duplicates\n"); - binary_fuse16_t filter; - binary_fuse16_allocate(size, &filter); - // we need some set of values - uint64_t *big_set = (uint64_t *)malloc(sizeof(uint64_t) * size); - size_t repeated_size = 10; - for (size_t i = 0; i < size - repeated_size; i++) { - big_set[i] = i; // we use contiguous values - } - for (size_t i = 0; i < repeated_size; i++) { - big_set[size - i - 1] = i; // we use contiguous values - } - // we construct the filter - if(!binary_fuse16_populate(big_set, size, &filter)) { return false; } - for (size_t i = 0; i < size; i++) { - if (!binary_fuse16_contain(big_set[i], &filter)) { - printf("bug!\n"); - return false; - } - } - - size_t random_matches = 0; - size_t trials = 10000000; - for (size_t i = 0; i < trials; i++) { - uint64_t random_key = ((uint64_t)rand() << 32) + rand(); - if (binary_fuse16_contain(random_key, &filter)) { - if (random_key >= size) { - random_matches++; - } - } - } - double fpp = random_matches * 1.0 / trials; - printf(" fpp %3.5f (estimated) \n", fpp); - double bpe = binary_fuse16_size_in_bytes(&filter) * 8.0 / size; - printf(" bits per entry %3.2f\n", bpe); - printf(" bits per entry %3.2f (theoretical lower bound)\n", - log(fpp)/log(2)); - printf(" efficiency ratio %3.3f \n", bpe /(- log(fpp)/log(2))); - binary_fuse16_free(&filter); - free(big_set); - return true; + binary_fuse16_t filter = {0}; // zero initialisation silences unitialized warning + return test(size, 10, &filter, + gen_binary_fuse16_allocate, + gen_binary_fuse16_free, + gen_binary_fuse16_size_in_bytes, + gen_binary_fuse16_serialization_bytes, + gen_binary_fuse16_serialize, + gen_binary_fuse16_deserialize, + gen_binary_fuse16_populate, + gen_binary_fuse16_contain); } void failure_rate_binary_fuse16() { @@ -404,19 +246,20 @@ void failure_rate_binary_fuse16() { size_t size = 5000; uint64_t *big_set = (uint64_t *)malloc(sizeof(uint64_t) * size); binary_fuse16_t filter; - binary_fuse16_allocate(size, &filter); + binary_fuse16_allocate((uint32_t)size, &filter); size_t failure = 0; size_t total_trials = 1000000; for(size_t trial = 0; trial <= 1000; trial++) { for (size_t i = 0; i < size; i++) { - big_set[i] = rand() + (((uint64_t) rand()) << 32); + big_set[i] = (uint64_t)rand() + (((uint64_t) rand()) << 32U); } - if(!binary_fuse16_populate(big_set, size, &filter)) { + if(!binary_fuse16_populate(big_set, (uint32_t)size, &filter)) { failure++; } } printf("failures %zu out of %zu\n\n", failure, total_trials); + binary_fuse16_free(&filter); free(big_set); } @@ -450,4 +293,4 @@ int main() { if(!testbinaryfuse16(0)) { abort(); } if(!testbinaryfuse16(1)) { abort(); } if(!testbinaryfuse16(2)) { abort(); } -} \ No newline at end of file +}