From 890b31d7b3c9ad3dfa3911882db13058bf6a7e2b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Oliver=20Sch=C3=B6nrock?= <oliver@schonrocks.com>
Date: Fri, 13 Dec 2024 18:54:26 +0000
Subject: [PATCH] eliminating Warnings (#64)

* first cut at reducing warnings

binaryfusefilter.h only

issues addressed

1. changed some return value and parameter types of (static) functions
-- PLEASE CHECK THIS IN REVIEW

2. sprinkled 'U' into bitwise operations to silence warnings

3. casting to avoid "standard integer promotion rules" which resulted
in signedness warnings

4. explicitly reducing results to the target type rather than letting
it happen implicitly

tests still passing

* first cut at reducing warnings

binaryfusefilter.h only

issues addressed

1. changed some return value and parameter types of (static) functions
-- PLEASE CHECK THIS IN REVIEW

2. sprinkled 'U' into bitwise operations to silence warnings

3. casting to avoid "standard integer promotion rules" which resulted
in signedness warnings

4. explicitly reducing results to the target type rather than letting
it happen implicitly

5. when and `if` statements ends in break or return, then a following
`else if` can be just a new `if`

tests still passing

* starting work on xofilter.h

* binclude/binaryfusefilter.h apparently clean for first time

* formatting

* first cut on xofilter.h

mostly casting size_t down to uint32_t - maybe some internal struct
types should have been size_t?

also some integer promotion casts

* round2 on xorfilter.h

mostly casting blocklengt to uint32_t to fit into keyindex.index

should keyindex.index be a size_t?

* bench.c and unit.c

very repetitive casting of mainly sizes and doubles.

* all silent now on a clean compile

with -Wconversion and -Wsign-conversion

so putting these in the Makefile, so during "private" development with
the Makefile new warnings will be noticed straight away

but not in CMakeLists.txt, because as this is a header-only INTERFACE
library, it would force these warning levels on the users.

* another sweep from including c++ project

turned up these additional 'U' tweaks

* mistaken cast which broke test

* factoring out the report functionality

all sections were indentical except for the call to *contain()
and *size_in_bytes

some void* and function pointer juggling allowed to make this generic

report code reduced by 2/3rds

* iron out slight inconsistencies between tests

* abstracting away the rest of the test logic

for all but the special "failure rate test"

the large function dispatch table is a litle annoying, but can be
removed as well...TBC

tests all pass

* fixing a memory leak caught by sanitizer

just a missing free()
---
 Makefile                   |   6 +-
 benchmarks/bench.c         |  36 +--
 include/binaryfusefilter.h | 140 +++++-----
 include/xorfilter.h        | 207 +++++++--------
 tests/unit.c               | 511 +++++++++++++------------------------
 5 files changed, 374 insertions(+), 526 deletions(-)

diff --git a/Makefile b/Makefile
index 81a33c4..8eb4843 100644
--- a/Makefile
+++ b/Makefile
@@ -1,14 +1,14 @@
 all: unit bench
 
 unit : tests/unit.c include/xorfilter.h include/binaryfusefilter.h
-	cc -std=c99 -O3 -o unit tests/unit.c -lm -Iinclude -Wall -Wextra -Wshadow  -Wcast-qual
+	cc -std=c99 -O3 -o unit tests/unit.c -lm -Iinclude -Wall -Wextra -Wshadow  -Wcast-qual -Wconversion -Wsign-conversion
 
 
 ab : tests/a.c tests/b.c
-	cc -std=c99 -o c tests/a.c tests/b.c -lm -Iinclude -Wall -Wextra -Wshadow  -Wcast-qual
+	cc -std=c99 -o c tests/a.c tests/b.c -lm -Iinclude -Wall -Wextra -Wshadow  -Wcast-qual -Wconversion -Wsign-conversion
 
 bench : benchmarks/bench.c include/xorfilter.h include/binaryfusefilter.h
-	cc -std=c99 -O3 -o bench benchmarks/bench.c -lm -Iinclude -Wall -Wextra -Wshadow  -Wcast-qual
+	cc -std=c99 -O3 -o bench benchmarks/bench.c -lm -Iinclude -Wall -Wextra -Wshadow  -Wcast-qual -Wconversion -Wsign-conversion
 
 test: unit ab
 	./unit
diff --git a/benchmarks/bench.c b/benchmarks/bench.c
index 93734d7..653d42a 100644
--- a/benchmarks/bench.c
+++ b/benchmarks/bench.c
@@ -9,19 +9,19 @@ bool testxor8(size_t size) {
 
   xor8_t filter;
 
-  xor8_allocate(size, &filter);
+  xor8_allocate((uint32_t)size, &filter);
   // we need some set of values
   uint64_t *big_set = (uint64_t *)malloc(sizeof(uint64_t) * size);
   for (size_t i = 0; i < size; i++) {
     big_set[i] = i; // we use contiguous values
   }
   // we construct the filter
-  bool constructed = xor8_populate(big_set, size, &filter); // warm the cache
+  bool constructed = xor8_populate(big_set, (uint32_t)size, &filter); // warm the cache
   if(!constructed) { return false; }
   for (size_t times = 0; times < 5; times++) {
     clock_t t;
     t = clock();
-    xor8_populate(big_set, size, &filter);
+    xor8_populate(big_set, (uint32_t)size, &filter);
     t = clock() - t;
     double time_taken = ((double)t) / CLOCKS_PER_SEC; // in seconds
     printf("It took %f seconds to build an index over %zu values. \n",
@@ -37,19 +37,19 @@ bool testbufferedxor8(size_t size) {
   printf("size = %zu \n", size);
 
   xor8_t filter;
-  xor8_allocate(size, &filter);
+  xor8_allocate((uint32_t)size, &filter);
   // we need some set of values
   uint64_t *big_set = (uint64_t *)malloc(sizeof(uint64_t) * size);
   for (size_t i = 0; i < size; i++) {
     big_set[i] = i; // we use contiguous values
   }
   // we construct the filter
-  bool constructed = xor8_buffered_populate(big_set, size, &filter); // warm the cache
+  bool constructed = xor8_buffered_populate(big_set, (uint32_t)size, &filter); // warm the cache
   if(!constructed) { return false; }
   for (size_t times = 0; times < 5; times++) {
     clock_t t;
     t = clock();
-    xor8_buffered_populate(big_set, size, &filter);
+    xor8_buffered_populate(big_set, (uint32_t)size, &filter);
     t = clock() - t;
     double time_taken = ((double)t) / CLOCKS_PER_SEC; // in seconds
     printf("It took %f seconds to build an index over %zu values. \n",
@@ -65,19 +65,19 @@ bool testxor16(size_t size) {
   printf("size = %zu \n", size);
 
   xor16_t filter;
-  xor16_allocate(size, &filter);
+  xor16_allocate((uint32_t)size, &filter);
   // we need some set of values
   uint64_t *big_set = (uint64_t *)malloc(sizeof(uint64_t) * size);
   for (size_t i = 0; i < size; i++) {
     big_set[i] = i; // we use contiguous values
   }
   // we construct the filter
-  bool constructed = xor16_populate(big_set, size, &filter); // warm the cache
+  bool constructed = xor16_populate(big_set, (uint32_t)size, &filter); // warm the cache
   if(!constructed) { return false; }
   for (size_t times = 0; times < 5; times++) {
     clock_t t;
     t = clock();
-    xor16_populate(big_set, size, &filter);
+    xor16_populate(big_set, (uint32_t)size, &filter);
     t = clock() - t;
     double time_taken = ((double)t) / CLOCKS_PER_SEC; // in seconds
     printf("It took %f seconds to build an index over %zu values. \n",
@@ -93,19 +93,19 @@ bool testbufferedxor16(size_t size) {
   printf("size = %zu \n", size);
 
   xor16_t filter;
-  xor16_allocate(size, &filter);
+  xor16_allocate((uint32_t)size, &filter);
   // we need some set of values
   uint64_t *big_set = (uint64_t *)malloc(sizeof(uint64_t) * size);
   for (size_t i = 0; i < size; i++) {
     big_set[i] = i; // we use contiguous values
   }
   // we construct the filter
-  bool constructed = xor16_buffered_populate(big_set, size, &filter); // warm the cache
+  bool constructed = xor16_buffered_populate(big_set, (uint32_t)size, &filter); // warm the cache
   if(!constructed) { return false; }
   for (size_t times = 0; times < 5; times++) {
     clock_t t;
     t = clock();
-    xor16_buffered_populate(big_set, size, &filter);
+    xor16_buffered_populate(big_set, (uint32_t)size, &filter);
     t = clock() - t;
     double time_taken = ((double)t) / CLOCKS_PER_SEC; // in seconds
     printf("It took %f seconds to build an index over %zu values. \n",
@@ -122,19 +122,19 @@ bool testbinaryfuse8(size_t size) {
 
   binary_fuse8_t filter;
 
-  binary_fuse8_allocate(size, &filter);
+  binary_fuse8_allocate((uint32_t)size, &filter);
   // we need some set of values
   uint64_t *big_set = (uint64_t *)malloc(sizeof(uint64_t) * size);
   for (size_t i = 0; i < size; i++) {
     big_set[i] = i; // we use contiguous values
   }
   // we construct the filter
-  bool constructed = binary_fuse8_populate(big_set, size, &filter); // warm the cache
+  bool constructed = binary_fuse8_populate(big_set, (uint32_t)size, &filter); // warm the cache
   if(!constructed) { return false; }
   for (size_t times = 0; times < 5; times++) {
     clock_t t;
     t = clock();
-    binary_fuse8_populate(big_set, size, &filter);
+    binary_fuse8_populate(big_set, (uint32_t)size, &filter);
     t = clock() - t;
     double time_taken = ((double)t) / CLOCKS_PER_SEC; // in seconds
     printf("It took %f seconds to build an index over %zu values. \n",
@@ -151,19 +151,19 @@ bool testbinaryfuse16(size_t size) {
 
   binary_fuse16_t filter;
 
-  binary_fuse16_allocate(size, &filter);
+  binary_fuse16_allocate((uint32_t)size, &filter);
   // we need some set of values
   uint64_t *big_set = (uint64_t *)malloc(sizeof(uint64_t) * size);
   for (size_t i = 0; i < size; i++) {
     big_set[i] = i; // we use contiguous values
   }
   // we construct the filter
-  bool constructed = binary_fuse16_populate(big_set, size, &filter); // warm the cache
+  bool constructed = binary_fuse16_populate(big_set, (uint32_t)size, &filter); // warm the cache
   if(!constructed) { return false; }
   for (size_t times = 0; times < 5; times++) {
     clock_t t;
     t = clock();
-    binary_fuse16_populate(big_set, size, &filter);
+    binary_fuse16_populate(big_set, (uint32_t)size, &filter);
     t = clock() - t;
     double time_taken = ((double)t) / CLOCKS_PER_SEC; // in seconds
     printf("It took %f seconds to build an index over %zu values. \n",
diff --git a/include/binaryfusefilter.h b/include/binaryfusefilter.h
index c9ea43a..7049f3f 100644
--- a/include/binaryfusefilter.h
+++ b/include/binaryfusefilter.h
@@ -8,13 +8,12 @@
 #include <stdlib.h>
 #include <string.h>
 #ifndef XOR_MAX_ITERATIONS
-#define XOR_MAX_ITERATIONS                                                     \
-  100 // probability of success should always be > 0.5 so 100 iterations is
-      // highly unlikely
+// probability of success should always be > 0.5 so 100 iterations is highly unlikely
+#define XOR_MAX_ITERATIONS 100 
 #endif
 
 static int binary_fuse_cmpfunc(const void * a, const void * b) {
-   return ( *(const uint64_t*)a - *(const uint64_t*)b );
+  return (int)( *(const uint64_t*)a - *(const uint64_t*)b );
 }
 
 static size_t binary_fuse_sort_and_remove_dup(uint64_t* keys, size_t length) {
@@ -33,25 +32,25 @@ static size_t binary_fuse_sort_and_remove_dup(uint64_t* keys, size_t length) {
  * We start with a few utilities.
  ***/
 static inline uint64_t binary_fuse_murmur64(uint64_t h) {
-  h ^= h >> 33;
+  h ^= h >> 33U;
   h *= UINT64_C(0xff51afd7ed558ccd);
-  h ^= h >> 33;
+  h ^= h >> 33U;
   h *= UINT64_C(0xc4ceb9fe1a85ec53);
-  h ^= h >> 33;
+  h ^= h >> 33U;
   return h;
 }
 static inline uint64_t binary_fuse_mix_split(uint64_t key, uint64_t seed) {
   return binary_fuse_murmur64(key + seed);
 }
 static inline uint64_t binary_fuse_rotl64(uint64_t n, unsigned int c) {
-  return (n << (c & 63)) | (n >> ((-c) & 63));
+  return (n << (c & 63U)) | (n >> ((-c) & 63U));
 }
 static inline uint32_t binary_fuse_reduce(uint32_t hash, uint32_t n) {
   // http://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/
-  return (uint32_t)(((uint64_t)hash * n) >> 32);
+  return (uint32_t)(((uint64_t)hash * n) >> 32U);
 }
-static inline uint64_t binary_fuse8_fingerprint(uint64_t hash) {
-  return hash ^ (hash >> 32);
+static inline uint8_t binary_fuse8_fingerprint(uint64_t hash) {
+  return (uint8_t)(hash ^ (hash >> 32U));
 }
 
 /**
@@ -61,9 +60,9 @@ static inline uint64_t binary_fuse8_fingerprint(uint64_t hash) {
 // returns random number, modifies the seed
 static inline uint64_t binary_fuse_rng_splitmix64(uint64_t *seed) {
   uint64_t z = (*seed += UINT64_C(0x9E3779B97F4A7C15));
-  z = (z ^ (z >> 30)) * UINT64_C(0xBF58476D1CE4E5B9);
-  z = (z ^ (z >> 27)) * UINT64_C(0x94D049BB133111EB);
-  return z ^ (z >> 31);
+  z = (z ^ (z >> 30U)) * UINT64_C(0xBF58476D1CE4E5B9);
+  z = (z ^ (z >> 27U)) * UINT64_C(0x94D049BB133111EB);
+  return z ^ (z >> 31U);
 }
 
 typedef struct binary_fuse8_s {
@@ -80,7 +79,7 @@ typedef struct binary_fuse8_s {
 //  https://stackoverflow.com/a/50958815
 #ifdef __SIZEOF_INT128__  // compilers supporting __uint128, e.g., gcc, clang
 static inline uint64_t binary_fuse_mulhi(uint64_t a, uint64_t b) {
-  return ((__uint128_t)a * b) >> 64;
+  return (uint64_t)(((__uint128_t)a * b) >> 64U);
 }
 #elif defined(_M_X64) || defined(_MARM64)   // MSVC
 static inline uint64_t binary_fuse_mulhi(uint64_t a, uint64_t b) {
@@ -158,20 +157,20 @@ static inline binary_hashes_t binary_fuse8_hash_batch(uint64_t hash,
   ans.h0 = (uint32_t)hi;
   ans.h1 = ans.h0 + filter->SegmentLength;
   ans.h2 = ans.h1 + filter->SegmentLength;
-  ans.h1 ^= (uint32_t)(hash >> 18) & filter->SegmentLengthMask;
+  ans.h1 ^= (uint32_t)(hash >> 18U) & filter->SegmentLengthMask;
   ans.h2 ^= (uint32_t)(hash)&filter->SegmentLengthMask;
   return ans;
 }
 
-static inline uint32_t binary_fuse8_hash(int index, uint64_t hash,
+static inline uint32_t binary_fuse8_hash(uint64_t index, uint64_t hash,
                                         const binary_fuse8_t *filter) {
     uint64_t h = binary_fuse_mulhi(hash, filter->SegmentCountLength);
     h += index * filter->SegmentLength;
     // keep the lower 36 bits
-    uint64_t hh = hash & ((1ULL << 36) - 1);
+    uint64_t hh = hash & ((1ULL << 36U) - 1);
     // index 0: right shift by 36; index 1: right shift by 18; index 2: no shift
     h ^= (size_t)((hh >> (36 - 18 * index)) & filter->SegmentLengthMask);
-    return h;
+    return (uint32_t)h;
 }
 
 // Report if the key is in the set, with false positive rate.
@@ -180,7 +179,8 @@ static inline bool binary_fuse8_contain(uint64_t key,
   uint64_t hash = binary_fuse_mix_split(key, filter->Seed);
   uint8_t f = binary_fuse8_fingerprint(hash);
   binary_hashes_t hashes = binary_fuse8_hash_batch(hash, filter);
-  f ^= filter->Fingerprints[hashes.h0] ^ filter->Fingerprints[hashes.h1] ^
+  f ^= (uint32_t)filter->Fingerprints[hashes.h0] ^
+       filter->Fingerprints[hashes.h1] ^
        filter->Fingerprints[hashes.h2];
   return f == 0;
 }
@@ -190,12 +190,12 @@ static inline uint32_t binary_fuse_calculate_segment_length(uint32_t arity,
   // These parameters are very sensitive. Replacing 'floor' by 'round' can
   // substantially affect the construction time.
   if (arity == 3) {
-    return ((uint32_t)1) << (int)(floor(log((double)(size)) / log(3.33) + 2.25));
-  } else if (arity == 4) {
-    return ((uint32_t)1) << (int)(floor(log((double)(size)) / log(2.91) - 0.5));
-  } else {
-    return 65536;
+    return ((uint32_t)1) << (unsigned)(floor(log((double)(size)) / log(3.33) + 2.25));
+  }
+  if (arity == 4) {
+    return ((uint32_t)1) << (unsigned)(floor(log((double)(size)) / log(2.91) - 0.5));
   }
+  return 65536;
 }
 
 static inline double binary_fuse_max(double a, double b) {
@@ -209,11 +209,11 @@ static inline double binary_fuse_calculate_size_factor(uint32_t arity,
                                                         uint32_t size) {
   if (arity == 3) {
     return binary_fuse_max(1.125, 0.875 + 0.25 * log(1000000.0) / log((double)size));
-  } else if (arity == 4) {
+  }
+  if (arity == 4) {
     return binary_fuse_max(1.075, 0.77 + 0.305 * log(600000.0) / log((double)size));
-  } else {
-    return 2.0;
   }
+  return 2.0;
 }
 
 // allocate enough capacity for a set containing up to 'size' elements
@@ -290,7 +290,7 @@ static inline bool binary_fuse8_populate(uint64_t *keys, uint32_t size,
     blockBits += 1;
   }
   uint32_t block = ((uint32_t)1 << blockBits);
-  uint32_t *startPos = (uint32_t *)malloc((1 << blockBits) * sizeof(uint32_t));
+  uint32_t *startPos = (uint32_t *)malloc((1U << blockBits) * sizeof(uint32_t));
   uint32_t h012[5];
 
   if ((alone == NULL) || (t2count == NULL) || (reverseH == NULL) ||
@@ -308,7 +308,7 @@ static inline bool binary_fuse8_populate(uint64_t *keys, uint32_t size,
     if (loop + 1 > XOR_MAX_ITERATIONS) {
       // The probability of this happening is lower than the
       // the cosmic-ray probability (i.e., a cosmic ray corrupts your system)
-      memset(filter->Fingerprints, ~0, filter->ArrayLength);
+      memset(filter->Fingerprints, 0xFF, filter->ArrayLength);
       free(alone);
       free(t2count);
       free(reverseH);
@@ -321,7 +321,7 @@ static inline bool binary_fuse8_populate(uint64_t *keys, uint32_t size,
     for (uint32_t i = 0; i < block; i++) {
       // important : i * size would overflow as a 32-bit number in some
       // cases.
-      startPos[i] = ((uint64_t)i * size) >> blockBits;
+      startPos[i] = (uint32_t)((uint64_t)i * size) >> blockBits;
     }
 
     uint64_t maskblock = block - 1;
@@ -344,12 +344,12 @@ static inline bool binary_fuse8_populate(uint64_t *keys, uint32_t size,
       t2hash[h0] ^= hash;
       uint32_t h1= binary_fuse8_hash(1, hash, filter);
       t2count[h1] += 4;
-      t2count[h1] ^= 1;
+      t2count[h1] ^= 1U;
       t2hash[h1] ^= hash;
       uint32_t h2 = binary_fuse8_hash(2, hash, filter);
       t2count[h2] += 4;
       t2hash[h2] ^= hash;
-      t2count[h2] ^= 2;
+      t2count[h2] ^= 2U;
       if ((t2hash[h0] & t2hash[h1] & t2hash[h2]) == 0) {
         if   (((t2hash[h0] == 0) && (t2count[h0] == 8))
           ||  ((t2hash[h1] == 0) && (t2count[h1] == 8))
@@ -358,10 +358,10 @@ static inline bool binary_fuse8_populate(uint64_t *keys, uint32_t size,
  					t2count[h0] -= 4;
  					t2hash[h0] ^= hash;
  					t2count[h1] -= 4;
- 					t2count[h1] ^= 1;
+ 					t2count[h1] ^= 1U;
  					t2hash[h1] ^= hash;
  					t2count[h2] -= 4;
- 					t2count[h2] ^= 2;
+ 					t2count[h2] ^= 2U;
  					t2hash[h2] ^= hash;
         }
       }
@@ -382,13 +382,13 @@ static inline bool binary_fuse8_populate(uint64_t *keys, uint32_t size,
     // Add sets with one key to the queue.
     for (uint32_t i = 0; i < capacity; i++) {
       alone[Qsize] = i;
-      Qsize += ((t2count[i] >> 2) == 1) ? 1 : 0;
+      Qsize += ((t2count[i] >> 2U) == 1) ? 1U : 0U;
     }
     uint32_t stacksize = 0;
     while (Qsize > 0) {
       Qsize--;
       uint32_t index = alone[Qsize];
-      if ((t2count[index] >> 2) == 1) {
+      if ((t2count[index] >> 2U) == 1) {
         uint64_t hash = t2hash[index];
 
         //h012[0] = binary_fuse8_hash(0, hash, filter);
@@ -396,13 +396,13 @@ static inline bool binary_fuse8_populate(uint64_t *keys, uint32_t size,
         h012[2] = binary_fuse8_hash(2, hash, filter);
         h012[3] = binary_fuse8_hash(0, hash, filter); // == h012[0];
         h012[4] = h012[1];
-        uint8_t found = t2count[index] & 3;
+        uint8_t found = t2count[index] & 3U;
         reverseH[stacksize] = found;
         reverseOrder[stacksize] = hash;
         stacksize++;
         uint32_t other_index1 = h012[found + 1];
         alone[Qsize] = other_index1;
-        Qsize += ((t2count[other_index1] >> 2) == 2 ? 1 : 0);
+        Qsize += ((t2count[other_index1] >> 2U) == 2 ? 1U : 0U);
 
         t2count[other_index1] -= 4;
         t2count[other_index1] ^= binary_fuse_mod3(found + 1);
@@ -410,7 +410,7 @@ static inline bool binary_fuse8_populate(uint64_t *keys, uint32_t size,
 
         uint32_t other_index2 = h012[found + 2];
         alone[Qsize] = other_index2;
-        Qsize += ((t2count[other_index2] >> 2) == 2 ? 1 : 0);
+        Qsize += ((t2count[other_index2] >> 2U) == 2 ? 1U : 0U);
         t2count[other_index2] -= 4;
         t2count[other_index2] ^= binary_fuse_mod3(found + 2);
         t2hash[other_index2] ^= hash;
@@ -420,8 +420,9 @@ static inline bool binary_fuse8_populate(uint64_t *keys, uint32_t size,
       // success
       size = stacksize;
       break;
-    } else if(duplicates > 0) {
-      size = binary_fuse_sort_and_remove_dup(keys, size);
+    }
+    if(duplicates > 0) {
+      size = (uint32_t)binary_fuse_sort_and_remove_dup(keys, size);
     }
     memset(reverseOrder, 0, sizeof(uint64_t) * size);
     memset(t2count, 0, sizeof(uint8_t) * capacity);
@@ -439,9 +440,9 @@ static inline bool binary_fuse8_populate(uint64_t *keys, uint32_t size,
     h012[2] = binary_fuse8_hash(2, hash, filter);
     h012[3] = h012[0];
     h012[4] = h012[1];
-    filter->Fingerprints[h012[found]] = xor2 ^
-                                        filter->Fingerprints[h012[found + 1]] ^
-                                        filter->Fingerprints[h012[found + 2]];
+    filter->Fingerprints[h012[found]] = (uint8_t)((uint32_t)xor2 ^
+                                                  filter->Fingerprints[h012[found + 1]] ^
+                                                  filter->Fingerprints[h012[found + 2]]);
   }
   free(alone);
   free(t2count);
@@ -466,8 +467,8 @@ typedef struct binary_fuse16_s {
   uint16_t *Fingerprints;
 } binary_fuse16_t;
 
-static inline uint64_t binary_fuse16_fingerprint(uint64_t hash) {
-  return hash ^ (hash >> 32);
+static inline uint16_t binary_fuse16_fingerprint(uint64_t hash) {
+  return (uint16_t)(hash ^ (hash >> 32U));
 }
 
 static inline binary_hashes_t binary_fuse16_hash_batch(uint64_t hash,
@@ -477,19 +478,19 @@ static inline binary_hashes_t binary_fuse16_hash_batch(uint64_t hash,
   ans.h0 = (uint32_t)hi;
   ans.h1 = ans.h0 + filter->SegmentLength;
   ans.h2 = ans.h1 + filter->SegmentLength;
-  ans.h1 ^= (uint32_t)(hash >> 18) & filter->SegmentLengthMask;
+  ans.h1 ^= (uint32_t)(hash >> 18U) & filter->SegmentLengthMask;
   ans.h2 ^= (uint32_t)(hash)&filter->SegmentLengthMask;
   return ans;
 }
-static inline uint32_t binary_fuse16_hash(int index, uint64_t hash,
+static inline uint32_t binary_fuse16_hash(uint64_t index, uint64_t hash,
                                         const binary_fuse16_t *filter) {
     uint64_t h = binary_fuse_mulhi(hash, filter->SegmentCountLength);
     h += index * filter->SegmentLength;
     // keep the lower 36 bits
-    uint64_t hh = hash & ((1ULL << 36) - 1);
+    uint64_t hh = hash & ((1ULL << 36U) - 1);
     // index 0: right shift by 36; index 1: right shift by 18; index 2: no shift
     h ^= (size_t)((hh >> (36 - 18 * index)) & filter->SegmentLengthMask);
-    return h;
+    return (uint32_t)h;
 }
 
 // Report if the key is in the set, with false positive rate.
@@ -498,7 +499,8 @@ static inline bool binary_fuse16_contain(uint64_t key,
   uint64_t hash = binary_fuse_mix_split(key, filter->Seed);
   uint16_t f = binary_fuse16_fingerprint(hash);
   binary_hashes_t hashes = binary_fuse16_hash_batch(hash, filter);
-  f ^= filter->Fingerprints[hashes.h0] ^ filter->Fingerprints[hashes.h1] ^
+  f ^= (uint32_t)filter->Fingerprints[hashes.h0] ^
+       filter->Fingerprints[hashes.h1] ^
        filter->Fingerprints[hashes.h2];
   return f == 0;
 }
@@ -575,7 +577,7 @@ static inline bool binary_fuse16_populate(uint64_t *keys, uint32_t size,
     blockBits += 1;
   }
   uint32_t block = ((uint32_t)1 << blockBits);
-  uint32_t *startPos = (uint32_t *)malloc((1 << blockBits) * sizeof(uint32_t));
+  uint32_t *startPos = (uint32_t *)malloc((1U << blockBits) * sizeof(uint32_t));
   uint32_t h012[5];
 
   if ((alone == NULL) || (t2count == NULL) || (reverseH == NULL) ||
@@ -605,7 +607,7 @@ static inline bool binary_fuse16_populate(uint64_t *keys, uint32_t size,
     for (uint32_t i = 0; i < block; i++) {
       // important : i * size would overflow as a 32-bit number in some
       // cases.
-      startPos[i] = ((uint64_t)i * size) >> blockBits;
+      startPos[i] = (uint32_t)(((uint64_t)i * size) >> blockBits);
     }
 
     uint64_t maskblock = block - 1;
@@ -628,12 +630,12 @@ static inline bool binary_fuse16_populate(uint64_t *keys, uint32_t size,
       t2hash[h0] ^= hash;
       uint32_t h1= binary_fuse16_hash(1, hash, filter);
       t2count[h1] += 4;
-      t2count[h1] ^= 1;
+      t2count[h1] ^= 1U;
       t2hash[h1] ^= hash;
       uint32_t h2 = binary_fuse16_hash(2, hash, filter);
       t2count[h2] += 4;
       t2hash[h2] ^= hash;
-      t2count[h2] ^= 2;
+      t2count[h2] ^= 2U;
       if ((t2hash[h0] & t2hash[h1] & t2hash[h2]) == 0) {
         if   (((t2hash[h0] == 0) && (t2count[h0] == 8))
           ||  ((t2hash[h1] == 0) && (t2count[h1] == 8))
@@ -642,10 +644,10 @@ static inline bool binary_fuse16_populate(uint64_t *keys, uint32_t size,
  					t2count[h0] -= 4;
  					t2hash[h0] ^= hash;
  					t2count[h1] -= 4;
- 					t2count[h1] ^= 1;
+ 					t2count[h1] ^= 1U;
  					t2hash[h1] ^= hash;
  					t2count[h2] -= 4;
- 					t2count[h2] ^= 2;
+ 					t2count[h2] ^= 2U;
  					t2hash[h2] ^= hash;
         }
       }
@@ -666,13 +668,13 @@ static inline bool binary_fuse16_populate(uint64_t *keys, uint32_t size,
     // Add sets with one key to the queue.
     for (uint32_t i = 0; i < capacity; i++) {
       alone[Qsize] = i;
-      Qsize += ((t2count[i] >> 2) == 1) ? 1 : 0;
+      Qsize += ((t2count[i] >> 2U) == 1) ? 1U : 0U;
     }
     uint32_t stacksize = 0;
     while (Qsize > 0) {
       Qsize--;
       uint32_t index = alone[Qsize];
-      if ((t2count[index] >> 2) == 1) {
+      if ((t2count[index] >> 2U) == 1) {
         uint64_t hash = t2hash[index];
 
         //h012[0] = binary_fuse16_hash(0, hash, filter);
@@ -680,13 +682,13 @@ static inline bool binary_fuse16_populate(uint64_t *keys, uint32_t size,
         h012[2] = binary_fuse16_hash(2, hash, filter);
         h012[3] = binary_fuse16_hash(0, hash, filter); // == h012[0];
         h012[4] = h012[1];
-        uint8_t found = t2count[index] & 3;
+        uint8_t found = t2count[index] & 3U;
         reverseH[stacksize] = found;
         reverseOrder[stacksize] = hash;
         stacksize++;
         uint32_t other_index1 = h012[found + 1];
         alone[Qsize] = other_index1;
-        Qsize += ((t2count[other_index1] >> 2) == 2 ? 1 : 0);
+        Qsize += ((t2count[other_index1] >> 2U) == 2 ? 1U : 0U);
 
         t2count[other_index1] -= 4;
         t2count[other_index1] ^= binary_fuse_mod3(found + 1);
@@ -694,7 +696,7 @@ static inline bool binary_fuse16_populate(uint64_t *keys, uint32_t size,
 
         uint32_t other_index2 = h012[found + 2];
         alone[Qsize] = other_index2;
-        Qsize += ((t2count[other_index2] >> 2) == 2 ? 1 : 0);
+        Qsize += ((t2count[other_index2] >> 2U) == 2 ? 1U : 0U);
         t2count[other_index2] -= 4;
         t2count[other_index2] ^= binary_fuse_mod3(found + 2);
         t2hash[other_index2] ^= hash;
@@ -704,8 +706,9 @@ static inline bool binary_fuse16_populate(uint64_t *keys, uint32_t size,
       // success
       size = stacksize;
       break;
-    } else if(duplicates > 0) {
-      size = binary_fuse_sort_and_remove_dup(keys, size);
+    }
+    if(duplicates > 0) {
+      size = (uint32_t)binary_fuse_sort_and_remove_dup(keys, size);
     }
     memset(reverseOrder, 0, sizeof(uint64_t) * size);
     memset(t2count, 0, sizeof(uint8_t) * capacity);
@@ -723,9 +726,10 @@ static inline bool binary_fuse16_populate(uint64_t *keys, uint32_t size,
     h012[2] = binary_fuse16_hash(2, hash, filter);
     h012[3] = h012[0];
     h012[4] = h012[1];
-    filter->Fingerprints[h012[found]] = xor2 ^
-                                        filter->Fingerprints[h012[found + 1]] ^
-                                        filter->Fingerprints[h012[found + 2]];
+    filter->Fingerprints[h012[found]] = (uint16_t)(
+        (uint32_t)xor2 ^
+        (uint32_t)filter->Fingerprints[h012[found + 1]] ^
+        (uint32_t)filter->Fingerprints[h012[found + 2]]);
   }
   free(alone);
   free(t2count);
diff --git a/include/xorfilter.h b/include/xorfilter.h
index c86fa80..49de6d9 100644
--- a/include/xorfilter.h
+++ b/include/xorfilter.h
@@ -12,12 +12,13 @@
 #endif
 
 #ifndef XOR_MAX_ITERATIONS
-#define XOR_MAX_ITERATIONS 100 // probabillity of success should always be > 0.5 so 100 iterations is highly unlikely
+// probabillity of success should always be > 0.5 so 100 iterations is highly unlikely
+#define XOR_MAX_ITERATIONS 100
 #endif
 
 
 static int xor_cmpfunc(const void * a, const void * b) {
-   return ( *(const uint64_t*)a - *(const uint64_t*)b );
+  return (int)( *(const uint64_t*)a - *(const uint64_t*)b );
 }
 
 static size_t xor_sort_and_remove_dup(uint64_t* keys, size_t length) {
@@ -43,11 +44,11 @@ static size_t xor_sort_and_remove_dup(uint64_t* keys, size_t length) {
  * We start with a few utilities.
  ***/
 static inline uint64_t xor_murmur64(uint64_t h) {
-  h ^= h >> 33;
+  h ^= h >> 33U;
   h *= UINT64_C(0xff51afd7ed558ccd);
-  h ^= h >> 33;
+  h ^= h >> 33U;
   h *= UINT64_C(0xc4ceb9fe1a85ec53);
-  h ^= h >> 33;
+  h ^= h >> 33U;
   return h;
 }
 
@@ -56,16 +57,16 @@ static inline uint64_t xor_mix_split(uint64_t key, uint64_t seed) {
 }
 
 static inline uint64_t xor_rotl64(uint64_t n, unsigned int c) {
-  return (n << (c & 63)) | (n >> ((-c) & 63));
+  return (n << (c & 63U)) | (n >> ((-c) & 63U));
 }
 
 static inline uint32_t xor_reduce(uint32_t hash, uint32_t n) {
   // http://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/
-  return (uint32_t)(((uint64_t)hash * n) >> 32);
+  return (uint32_t)(((uint64_t)hash * n) >> 32U);
 }
 
 static inline uint64_t xor_fingerprint(uint64_t hash) {
-  return hash ^ (hash >> 32);
+  return hash ^ (hash >> 32U);
 }
 
 /**
@@ -75,9 +76,9 @@ static inline uint64_t xor_fingerprint(uint64_t hash) {
 // returns random number, modifies the seed
 static inline uint64_t xor_rng_splitmix64(uint64_t *seed) {
   uint64_t z = (*seed += UINT64_C(0x9E3779B97F4A7C15));
-  z = (z ^ (z >> 30)) * UINT64_C(0xBF58476D1CE4E5B9);
-  z = (z ^ (z >> 27)) * UINT64_C(0x94D049BB133111EB);
-  return z ^ (z >> 31);
+  z = (z ^ (z >> 30U)) * UINT64_C(0xBF58476D1CE4E5B9);
+  z = (z ^ (z >> 27U)) * UINT64_C(0x94D049BB133111EB);
+  return z ^ (z >> 31U);
 }
 
 /**
@@ -94,15 +95,16 @@ typedef struct xor8_s {
 // Report if the key is in the set, with false positive rate.
 static inline bool xor8_contain(uint64_t key, const xor8_t *filter) {
   uint64_t hash = xor_mix_split(key, filter->seed);
-  uint8_t f = xor_fingerprint(hash);
+  uint8_t f = (uint8_t)xor_fingerprint(hash);
   uint32_t r0 = (uint32_t)hash;
   uint32_t r1 = (uint32_t)xor_rotl64(hash, 21);
   uint32_t r2 = (uint32_t)xor_rotl64(hash, 42);
-  uint32_t h0 = xor_reduce(r0, filter->blockLength);
-  uint32_t h1 = xor_reduce(r1, filter->blockLength) + filter->blockLength;
-  uint32_t h2 = xor_reduce(r2, filter->blockLength) + 2 * filter->blockLength;
-  return f == (filter->fingerprints[h0] ^ filter->fingerprints[h1] ^
-       filter->fingerprints[h2]);
+  uint32_t h0 = xor_reduce(r0, (uint32_t)filter->blockLength);
+  uint32_t h1 = xor_reduce(r1, (uint32_t)filter->blockLength) + (uint32_t)filter->blockLength;
+  uint32_t h2 = xor_reduce(r2, (uint32_t)filter->blockLength) + 2 * (uint32_t)filter->blockLength;
+  return f == ((uint32_t)filter->fingerprints[h0] ^
+               filter->fingerprints[h1] ^
+               filter->fingerprints[h2]);
 }
 
 typedef struct xor16_s {
@@ -115,43 +117,42 @@ typedef struct xor16_s {
 // Report if the key is in the set, with false positive rate.
 static inline bool xor16_contain(uint64_t key, const xor16_t *filter) {
   uint64_t hash = xor_mix_split(key, filter->seed);
-  uint16_t f = xor_fingerprint(hash);
+  uint16_t f = (uint16_t)xor_fingerprint(hash);
   uint32_t r0 = (uint32_t)hash;
   uint32_t r1 = (uint32_t)xor_rotl64(hash, 21);
   uint32_t r2 = (uint32_t)xor_rotl64(hash, 42);
-  uint32_t h0 = xor_reduce(r0, filter->blockLength);
-  uint32_t h1 = xor_reduce(r1, filter->blockLength) + filter->blockLength;
-  uint32_t h2 = xor_reduce(r2, filter->blockLength) + 2 * filter->blockLength;
-  return f == (filter->fingerprints[h0] ^ filter->fingerprints[h1] ^
-       filter->fingerprints[h2]);
+  uint32_t h0 = xor_reduce(r0, (uint32_t)filter->blockLength);
+  uint32_t h1 = xor_reduce(r1, (uint32_t)filter->blockLength) + (uint32_t)filter->blockLength;
+  uint32_t h2 = xor_reduce(r2, (uint32_t)filter->blockLength) + 2 * (uint32_t)filter->blockLength;
+  return f == ((uint32_t)filter->fingerprints[h0] ^
+               filter->fingerprints[h1] ^
+               filter->fingerprints[h2]);
 }
 
 // allocate enough capacity for a set containing up to 'size' elements
 // caller is responsible to call xor8_free(filter)
 static inline bool xor8_allocate(uint32_t size, xor8_t *filter) {
-  size_t capacity = 32 + 1.23 * size;
+  size_t capacity = (size_t)(32 + 1.23 * size);
   capacity = capacity / 3 * 3;
   filter->fingerprints = (uint8_t *)malloc(capacity * sizeof(uint8_t));
   if (filter->fingerprints != NULL) {
     filter->blockLength = capacity / 3;
     return true;
-  } else {
-    return false;
   }
+  return false;
 }
 
 // allocate enough capacity for a set containing up to 'size' elements
 // caller is responsible to call xor16_free(filter)
 static inline bool xor16_allocate(uint32_t size, xor16_t *filter) {
-  size_t capacity = 32 + 1.23 * size;
+  size_t capacity = (size_t)(32 + 1.23 * size);
   capacity = capacity / 3 * 3;
   filter->fingerprints = (uint16_t *)malloc(capacity * sizeof(uint16_t));
   if (filter->fingerprints != NULL) {
     filter->blockLength = capacity / 3;
     return true;
-  } else {
-    return false;
-  }
+  } 
+  return false;
 }
 
 // report memory usage
@@ -202,9 +203,9 @@ static inline xor_hashes_t xor8_get_h0_h1_h2(uint64_t k, const xor8_t *filter) {
   uint32_t r1 = (uint32_t)xor_rotl64(hash, 21);
   uint32_t r2 = (uint32_t)xor_rotl64(hash, 42);
 
-  answer.h0 = xor_reduce(r0, filter->blockLength);
-  answer.h1 = xor_reduce(r1, filter->blockLength);
-  answer.h2 = xor_reduce(r2, filter->blockLength);
+  answer.h0 = xor_reduce(r0, (uint32_t)filter->blockLength);
+  answer.h1 = xor_reduce(r1, (uint32_t)filter->blockLength);
+  answer.h2 = xor_reduce(r2, (uint32_t)filter->blockLength);
   return answer;
 }
 
@@ -218,27 +219,27 @@ typedef struct xor_h0h1h2_s xor_h0h1h2_t;
 
 static inline uint32_t xor8_get_h0(uint64_t hash, const xor8_t *filter) {
   uint32_t r0 = (uint32_t)hash;
-  return xor_reduce(r0, filter->blockLength);
+  return xor_reduce(r0, (uint32_t)filter->blockLength);
 }
 static inline uint32_t xor8_get_h1(uint64_t hash, const xor8_t *filter) {
   uint32_t r1 = (uint32_t)xor_rotl64(hash, 21);
-  return xor_reduce(r1, filter->blockLength);
+  return xor_reduce(r1, (uint32_t)filter->blockLength);
 }
 static inline uint32_t xor8_get_h2(uint64_t hash, const xor8_t *filter) {
   uint32_t r2 = (uint32_t)xor_rotl64(hash, 42);
-  return xor_reduce(r2, filter->blockLength);
+  return xor_reduce(r2, (uint32_t)filter->blockLength);
 }
 static inline uint32_t xor16_get_h0(uint64_t hash, const xor16_t *filter) {
   uint32_t r0 = (uint32_t)hash;
-  return xor_reduce(r0, filter->blockLength);
+  return xor_reduce(r0, (uint32_t)filter->blockLength);
 }
 static inline uint32_t xor16_get_h1(uint64_t hash, const xor16_t *filter) {
   uint32_t r1 = (uint32_t)xor_rotl64(hash, 21);
-  return xor_reduce(r1, filter->blockLength);
+  return xor_reduce(r1, (uint32_t)filter->blockLength);
 }
 static inline uint32_t xor16_get_h2(uint64_t hash, const xor16_t *filter) {
   uint32_t r2 = (uint32_t)xor_rotl64(hash, 42);
-  return xor_reduce(r2, filter->blockLength);
+  return xor_reduce(r2, (uint32_t)filter->blockLength);
 }
 static inline xor_hashes_t xor16_get_h0_h1_h2(uint64_t k,
                                               const xor16_t *filter) {
@@ -249,9 +250,9 @@ static inline xor_hashes_t xor16_get_h0_h1_h2(uint64_t k,
   uint32_t r1 = (uint32_t)xor_rotl64(hash, 21);
   uint32_t r2 = (uint32_t)xor_rotl64(hash, 42);
 
-  answer.h0 = xor_reduce(r0, filter->blockLength);
-  answer.h1 = xor_reduce(r1, filter->blockLength);
-  answer.h2 = xor_reduce(r2, filter->blockLength);
+  answer.h0 = xor_reduce(r0, (uint32_t)filter->blockLength);
+  answer.h1 = xor_reduce(r1, (uint32_t)filter->blockLength);
+  answer.h2 = xor_reduce(r2, (uint32_t)filter->blockLength);
   return answer;
 }
 
@@ -265,7 +266,7 @@ typedef struct xor_keyindex_s xor_keyindex_t;
 struct xor_setbuffer_s {
   xor_keyindex_t *buffer;
   uint32_t *counts;
-  int insignificantbits;
+  int insignificantbits; // should be an unsigned type to avoid a lot of casts
   uint32_t slotsize; // should be 1<< insignificantbits
   uint32_t slotcount;
   size_t originalsize;
@@ -276,10 +277,10 @@ typedef struct xor_setbuffer_s xor_setbuffer_t;
 static inline bool xor_init_buffer(xor_setbuffer_t *buffer, size_t size) {
   buffer->originalsize = size;
   buffer->insignificantbits = 18;
-  buffer->slotsize = UINT32_C(1) << buffer->insignificantbits;
-  buffer->slotcount = (size + buffer->slotsize - 1) / buffer->slotsize;
+  buffer->slotsize = UINT32_C(1) << (uint32_t)buffer->insignificantbits;
+  buffer->slotcount = (uint32_t)(size + buffer->slotsize - 1) / buffer->slotsize;
   buffer->buffer = (xor_keyindex_t *)malloc(
-      buffer->slotcount * buffer->slotsize * sizeof(xor_keyindex_t));
+      (size_t)buffer->slotcount * buffer->slotsize * sizeof(xor_keyindex_t));
   buffer->counts = (uint32_t *)malloc(buffer->slotcount * sizeof(uint32_t));
   if ((buffer->counts == NULL) || (buffer->buffer == NULL)) {
     free(buffer->counts);
@@ -300,12 +301,12 @@ static inline void xor_free_buffer(xor_setbuffer_t *buffer) {
 static inline void xor_buffered_increment_counter(uint32_t index, uint64_t hash,
                                                   xor_setbuffer_t *buffer,
                                                   xor_xorset_t *sets) {
-  uint32_t slot = index >> buffer->insignificantbits;
-  size_t addr = buffer->counts[slot] + (slot << buffer->insignificantbits);
+  uint32_t slot = index >> (uint32_t)buffer->insignificantbits;
+  size_t addr = buffer->counts[slot] + (slot << (uint32_t)buffer->insignificantbits);
   buffer->buffer[addr].index = index;
   buffer->buffer[addr].hash = hash;
   buffer->counts[slot]++;
-  size_t offset = (slot << buffer->insignificantbits);
+  size_t offset = (slot << (uint32_t)buffer->insignificantbits);
   if (buffer->counts[slot] == buffer->slotsize) {
     // must empty the buffer
     for (size_t i = offset; i < buffer->slotsize + offset; i++) {
@@ -321,10 +322,10 @@ static inline void xor_buffered_increment_counter(uint32_t index, uint64_t hash,
 static inline void xor_make_buffer_current(xor_setbuffer_t *buffer,
                                            xor_xorset_t *sets, uint32_t index,
                                            xor_keyindex_t *Q, size_t *Qsize) {
-  uint32_t slot = index >> buffer->insignificantbits;
+  uint32_t slot = index >> (uint32_t)buffer->insignificantbits;
   if(buffer->counts[slot] > 0) { // uncommon!
     size_t qsize = *Qsize;
-    size_t offset = (slot << buffer->insignificantbits);
+    size_t offset = (slot << (uint32_t)buffer->insignificantbits);
     for (size_t i = offset; i < buffer->counts[slot] + offset; i++) {
       xor_keyindex_t ki = buffer->buffer[i];
       sets[ki.index].xormask ^= ki.hash;
@@ -347,14 +348,14 @@ static inline void xor_buffered_decrement_counter(uint32_t index, uint64_t hash,
                                                   xor_xorset_t *sets,
                                                   xor_keyindex_t *Q,
                                                   size_t *Qsize) {
-  uint32_t slot = index >> buffer->insignificantbits;
-  size_t addr = buffer->counts[slot] + (slot << buffer->insignificantbits);
+  uint32_t slot = index >> (uint32_t)buffer->insignificantbits;
+  size_t addr = buffer->counts[slot] + (slot << (uint32_t)buffer->insignificantbits);
   buffer->buffer[addr].index = index;
   buffer->buffer[addr].hash = hash;
   buffer->counts[slot]++;
   if (buffer->counts[slot] == buffer->slotsize) {
     size_t qsize = *Qsize;
-    size_t offset = (slot << buffer->insignificantbits);
+    size_t offset = (slot << (uint32_t)buffer->insignificantbits);
     for (size_t i = offset; i < buffer->counts[slot] + offset; i++) {
       xor_keyindex_t ki =
           buffer->buffer[i];
@@ -374,7 +375,7 @@ static inline void xor_buffered_decrement_counter(uint32_t index, uint64_t hash,
 static inline void xor_flush_increment_buffer(xor_setbuffer_t *buffer,
                                               xor_xorset_t *sets) {
   for (uint32_t slot = 0; slot < buffer->slotcount; slot++) {
-    size_t offset = (slot << buffer->insignificantbits);
+    size_t offset = (slot << (uint32_t)buffer->insignificantbits);
     for (size_t i = offset; i < buffer->counts[slot] + offset; i++) {
       xor_keyindex_t ki =
           buffer->buffer[i];
@@ -391,7 +392,7 @@ static inline void xor_flush_decrement_buffer(xor_setbuffer_t *buffer,
                                               size_t *Qsize) {
   size_t qsize = *Qsize;
   for (uint32_t slot = 0; slot < buffer->slotcount; slot++) {
-    uint32_t base = (slot << buffer->insignificantbits);
+    uint32_t base = (slot << (uint32_t)buffer->insignificantbits);
     for (size_t i = base; i < buffer->counts[slot] + base; i++) {
       xor_keyindex_t ki = buffer->buffer[i];
       sets[ki.index].xormask ^= ki.hash;
@@ -422,7 +423,7 @@ static inline uint32_t xor_flushone_decrement_buffer(xor_setbuffer_t *buffer,
   uint32_t slot = bestslot;
   size_t qsize = *Qsize;
   // for(uint32_t slot = 0; slot < buffer->slotcount; slot++) {
-  uint32_t base = (slot << buffer->insignificantbits);
+  uint32_t base = (slot << (uint32_t)buffer->insignificantbits);
   for (size_t i = base; i < buffer->counts[slot] + base; i++) {
     xor_keyindex_t ki = buffer->buffer[i];
     sets[ki.index].xormask ^= ki.hash;
@@ -491,7 +492,7 @@ static inline bool xor8_buffered_populate(uint64_t *keys, uint32_t size, xor8_t
   while (true) {
     iterations ++;
     if(iterations == XOR_SORT_ITERATIONS) {
-      size = xor_sort_and_remove_dup(keys, size);
+      size = (uint32_t)xor_sort_and_remove_dup(keys, size);
     }
     if(iterations > XOR_MAX_ITERATIONS) {
       // The probability of this happening is lower than the
@@ -522,7 +523,7 @@ static inline bool xor8_buffered_populate(uint64_t *keys, uint32_t size, xor8_t
     size_t Q0size = 0, Q1size = 0, Q2size = 0;
     for (size_t i = 0; i < filter->blockLength; i++) {
       if (sets0[i].count == 1) {
-        Q0[Q0size].index = i;
+        Q0[Q0size].index = (uint32_t)i;
         Q0[Q0size].hash = sets0[i].xormask;
         Q0size++;
       }
@@ -530,14 +531,14 @@ static inline bool xor8_buffered_populate(uint64_t *keys, uint32_t size, xor8_t
 
     for (size_t i = 0; i < filter->blockLength; i++) {
       if (sets1[i].count == 1) {
-        Q1[Q1size].index = i;
+        Q1[Q1size].index = (uint32_t)i;
         Q1[Q1size].hash = sets1[i].xormask;
         Q1size++;
       }
     }
     for (size_t i = 0; i < filter->blockLength; i++) {
       if (sets2[i].count == 1) {
-        Q2[Q2size].index = i;
+        Q2[Q2size].index = (uint32_t)i;
         Q2[Q2size].hash = sets2[i].xormask;
         Q2size++;
       }
@@ -548,7 +549,7 @@ static inline bool xor8_buffered_populate(uint64_t *keys, uint32_t size, xor8_t
       while (Q0size > 0) {
         xor_keyindex_t keyindex = Q0[--Q0size];
         size_t index = keyindex.index;
-        xor_make_buffer_current(&buffer0, sets0, index, Q0, &Q0size);
+        xor_make_buffer_current(&buffer0, sets0, (uint32_t)index, Q0, &Q0size);
 
         if (sets0[index].count == 0)
           continue; // not actually possible after the initial scan.
@@ -570,7 +571,7 @@ static inline bool xor8_buffered_populate(uint64_t *keys, uint32_t size, xor8_t
       while (Q1size > 0) {
         xor_keyindex_t keyindex = Q1[--Q1size];
         size_t index = keyindex.index;
-        xor_make_buffer_current(&buffer1, sets1, index, Q1, &Q1size);
+        xor_make_buffer_current(&buffer1, sets1, (uint32_t)index, Q1, &Q1size);
 
         if (sets1[index].count == 0)
           continue;
@@ -578,7 +579,7 @@ static inline bool xor8_buffered_populate(uint64_t *keys, uint32_t size, xor8_t
         uint64_t hash = keyindex.hash;
         uint32_t h0 = xor8_get_h0(hash, filter);
         uint32_t h2 = xor8_get_h2(hash, filter);
-        keyindex.index += blockLength;
+        keyindex.index += (uint32_t)blockLength;
         stack[stack_size] = keyindex;
         stack_size++;
         xor_buffered_decrement_counter(h0, hash, &buffer0, sets0, Q0, &Q0size);
@@ -590,7 +591,7 @@ static inline bool xor8_buffered_populate(uint64_t *keys, uint32_t size, xor8_t
       while (Q2size > 0) {
         xor_keyindex_t keyindex = Q2[--Q2size];
         size_t index = keyindex.index;
-        xor_make_buffer_current(&buffer2, sets2, index, Q2, &Q2size);
+        xor_make_buffer_current(&buffer2, sets2, (uint32_t)index, Q2, &Q2size);
         if (sets2[index].count == 0)
           continue;
 
@@ -599,7 +600,7 @@ static inline bool xor8_buffered_populate(uint64_t *keys, uint32_t size, xor8_t
 
         uint32_t h0 = xor8_get_h0(hash, filter);
         uint32_t h1 = xor8_get_h1(hash, filter);
-        keyindex.index += 2 * blockLength;
+        keyindex.index += 2 * (uint32_t)blockLength;
 
         stack[stack_size] = keyindex;
         stack_size++;
@@ -632,13 +633,13 @@ static inline bool xor8_buffered_populate(uint64_t *keys, uint32_t size, xor8_t
     xor_keyindex_t ki = stack[--stack_size];
     uint64_t val = xor_fingerprint(ki.hash);
     if(ki.index < blockLength) {
-      val ^= fingerprints1[xor8_get_h1(ki.hash,filter)] ^ fingerprints2[xor8_get_h2(ki.hash,filter)];
+      val ^= (uint32_t)fingerprints1[xor8_get_h1(ki.hash,filter)] ^ fingerprints2[xor8_get_h2(ki.hash,filter)];
     } else if(ki.index < 2 * blockLength) {
-      val ^= fingerprints0[xor8_get_h0(ki.hash,filter)] ^ fingerprints2[xor8_get_h2(ki.hash,filter)];
+      val ^= (uint32_t)fingerprints0[xor8_get_h0(ki.hash,filter)] ^ fingerprints2[xor8_get_h2(ki.hash,filter)];
     } else {
-      val ^= fingerprints0[xor8_get_h0(ki.hash,filter)] ^ fingerprints1[xor8_get_h1(ki.hash,filter)];
+      val ^= (uint32_t)fingerprints0[xor8_get_h0(ki.hash,filter)] ^ fingerprints1[xor8_get_h1(ki.hash,filter)];
     }
-    filter->fingerprints[ki.index] = val;
+    filter->fingerprints[ki.index] = (uint8_t)val;
   }
   xor_free_buffer(&buffer0);
   xor_free_buffer(&buffer1);
@@ -689,7 +690,7 @@ static inline bool xor8_populate(uint64_t *keys, uint32_t size, xor8_t *filter)
   while (true) {
     iterations ++;
     if(iterations == XOR_SORT_ITERATIONS) {
-      size = xor_sort_and_remove_dup(keys, size);
+      size = (uint32_t)xor_sort_and_remove_dup(keys, size);
     }
     if(iterations > XOR_MAX_ITERATIONS) {
       // The probability of this happening is lower than the
@@ -716,7 +717,7 @@ static inline bool xor8_populate(uint64_t *keys, uint32_t size, xor8_t *filter)
     size_t Q0size = 0, Q1size = 0, Q2size = 0;
     for (size_t i = 0; i < filter->blockLength; i++) {
       if (sets0[i].count == 1) {
-        Q0[Q0size].index = i;
+        Q0[Q0size].index = (uint32_t)i;
         Q0[Q0size].hash = sets0[i].xormask;
         Q0size++;
       }
@@ -724,14 +725,14 @@ static inline bool xor8_populate(uint64_t *keys, uint32_t size, xor8_t *filter)
 
     for (size_t i = 0; i < filter->blockLength; i++) {
       if (sets1[i].count == 1) {
-        Q1[Q1size].index = i;
+        Q1[Q1size].index = (uint32_t)i;
         Q1[Q1size].hash = sets1[i].xormask;
         Q1size++;
       }
     }
     for (size_t i = 0; i < filter->blockLength; i++) {
       if (sets2[i].count == 1) {
-        Q2[Q2size].index = i;
+        Q2[Q2size].index = (uint32_t)i;
         Q2[Q2size].hash = sets2[i].xormask;
         Q2size++;
       }
@@ -775,7 +776,7 @@ static inline bool xor8_populate(uint64_t *keys, uint32_t size, xor8_t *filter)
         uint64_t hash = keyindex.hash;
         uint32_t h0 = xor8_get_h0(hash, filter);
         uint32_t h2 = xor8_get_h2(hash, filter);
-        keyindex.index += blockLength;
+        keyindex.index += (uint32_t)blockLength;
         stack[stack_size] = keyindex;
         stack_size++;
         sets0[h0].xormask ^= hash;
@@ -804,7 +805,7 @@ static inline bool xor8_populate(uint64_t *keys, uint32_t size, xor8_t *filter)
 
         uint32_t h0 = xor8_get_h0(hash, filter);
         uint32_t h1 = xor8_get_h1(hash, filter);
-        keyindex.index += 2 * blockLength;
+        keyindex.index += 2 * (uint32_t)blockLength;
 
         stack[stack_size] = keyindex;
         stack_size++;
@@ -841,13 +842,13 @@ static inline bool xor8_populate(uint64_t *keys, uint32_t size, xor8_t *filter)
     xor_keyindex_t ki = stack[--stack_size];
     uint64_t val = xor_fingerprint(ki.hash);
     if(ki.index < blockLength) {
-      val ^= fingerprints1[xor8_get_h1(ki.hash,filter)] ^ fingerprints2[xor8_get_h2(ki.hash,filter)];
+      val ^= (uint32_t)fingerprints1[xor8_get_h1(ki.hash,filter)] ^ fingerprints2[xor8_get_h2(ki.hash,filter)];
     } else if(ki.index < 2 * blockLength) {
-      val ^= fingerprints0[xor8_get_h0(ki.hash,filter)] ^ fingerprints2[xor8_get_h2(ki.hash,filter)];
+      val ^= (uint32_t)fingerprints0[xor8_get_h0(ki.hash,filter)] ^ fingerprints2[xor8_get_h2(ki.hash,filter)];
     } else {
-      val ^= fingerprints0[xor8_get_h0(ki.hash,filter)] ^ fingerprints1[xor8_get_h1(ki.hash,filter)];
+      val ^= (uint32_t)fingerprints0[xor8_get_h0(ki.hash,filter)] ^ fingerprints1[xor8_get_h1(ki.hash,filter)];
     }
-    filter->fingerprints[ki.index] = val;
+    filter->fingerprints[ki.index] = (uint8_t)val;
   }
 
   free(sets);
@@ -909,7 +910,7 @@ static inline bool xor16_buffered_populate(uint64_t *keys, uint32_t size, xor16_
   while (true) {
     iterations ++;
     if(iterations == XOR_SORT_ITERATIONS) {
-      size = xor_sort_and_remove_dup(keys, size);
+      size = (uint32_t)xor_sort_and_remove_dup(keys, size);
     }
     if(iterations > XOR_MAX_ITERATIONS) {
       // The probability of this happening is lower than the
@@ -941,7 +942,7 @@ static inline bool xor16_buffered_populate(uint64_t *keys, uint32_t size, xor16_
     size_t Q0size = 0, Q1size = 0, Q2size = 0;
     for (size_t i = 0; i < filter->blockLength; i++) {
       if (sets0[i].count == 1) {
-        Q0[Q0size].index = i;
+        Q0[Q0size].index = (uint32_t)i;
         Q0[Q0size].hash = sets0[i].xormask;
         Q0size++;
       }
@@ -949,14 +950,14 @@ static inline bool xor16_buffered_populate(uint64_t *keys, uint32_t size, xor16_
 
     for (size_t i = 0; i < filter->blockLength; i++) {
       if (sets1[i].count == 1) {
-        Q1[Q1size].index = i;
+        Q1[Q1size].index = (uint32_t)i;
         Q1[Q1size].hash = sets1[i].xormask;
         Q1size++;
       }
     }
     for (size_t i = 0; i < filter->blockLength; i++) {
       if (sets2[i].count == 1) {
-        Q2[Q2size].index = i;
+        Q2[Q2size].index = (uint32_t)i;
         Q2[Q2size].hash = sets2[i].xormask;
         Q2size++;
       }
@@ -967,7 +968,7 @@ static inline bool xor16_buffered_populate(uint64_t *keys, uint32_t size, xor16_
       while (Q0size > 0) {
         xor_keyindex_t keyindex = Q0[--Q0size];
         size_t index = keyindex.index;
-        xor_make_buffer_current(&buffer0, sets0, index, Q0, &Q0size);
+        xor_make_buffer_current(&buffer0, sets0, (uint32_t)index, Q0, &Q0size);
 
         if (sets0[index].count == 0)
           continue; // not actually possible after the initial scan.
@@ -989,7 +990,7 @@ static inline bool xor16_buffered_populate(uint64_t *keys, uint32_t size, xor16_
       while (Q1size > 0) {
         xor_keyindex_t keyindex = Q1[--Q1size];
         size_t index = keyindex.index;
-        xor_make_buffer_current(&buffer1, sets1, index, Q1, &Q1size);
+        xor_make_buffer_current(&buffer1, sets1, (uint32_t)index, Q1, &Q1size);
 
         if (sets1[index].count == 0)
           continue;
@@ -997,7 +998,7 @@ static inline bool xor16_buffered_populate(uint64_t *keys, uint32_t size, xor16_
         uint64_t hash = keyindex.hash;
         uint32_t h0 = xor16_get_h0(hash, filter);
         uint32_t h2 = xor16_get_h2(hash, filter);
-        keyindex.index += blockLength;
+        keyindex.index += (uint32_t)blockLength;
         stack[stack_size] = keyindex;
         stack_size++;
         xor_buffered_decrement_counter(h0, hash, &buffer0, sets0, Q0, &Q0size);
@@ -1009,7 +1010,7 @@ static inline bool xor16_buffered_populate(uint64_t *keys, uint32_t size, xor16_
       while (Q2size > 0) {
         xor_keyindex_t keyindex = Q2[--Q2size];
         size_t index = keyindex.index;
-        xor_make_buffer_current(&buffer2, sets2, index, Q2, &Q2size);
+        xor_make_buffer_current(&buffer2, sets2, (uint32_t)index, Q2, &Q2size);
         if (sets2[index].count == 0)
           continue;
 
@@ -1018,7 +1019,7 @@ static inline bool xor16_buffered_populate(uint64_t *keys, uint32_t size, xor16_
 
         uint32_t h0 = xor16_get_h0(hash, filter);
         uint32_t h1 = xor16_get_h1(hash, filter);
-        keyindex.index += 2 * blockLength;
+        keyindex.index += 2 * (uint32_t)blockLength;
 
         stack[stack_size] = keyindex;
         stack_size++;
@@ -1051,13 +1052,13 @@ static inline bool xor16_buffered_populate(uint64_t *keys, uint32_t size, xor16_
     xor_keyindex_t ki = stack[--stack_size];
     uint64_t val = xor_fingerprint(ki.hash);
     if(ki.index < blockLength) {
-      val ^= fingerprints1[xor16_get_h1(ki.hash,filter)] ^ fingerprints2[xor16_get_h2(ki.hash,filter)];
+      val ^= (uint32_t)fingerprints1[xor16_get_h1(ki.hash,filter)] ^ fingerprints2[xor16_get_h2(ki.hash,filter)];
     } else if(ki.index < 2 * blockLength) {
-      val ^= fingerprints0[xor16_get_h0(ki.hash,filter)] ^ fingerprints2[xor16_get_h2(ki.hash,filter)];
+      val ^= (uint32_t)fingerprints0[xor16_get_h0(ki.hash,filter)] ^ fingerprints2[xor16_get_h2(ki.hash,filter)];
     } else {
-      val ^= fingerprints0[xor16_get_h0(ki.hash,filter)] ^ fingerprints1[xor16_get_h1(ki.hash,filter)];
+      val ^= (uint32_t)fingerprints0[xor16_get_h0(ki.hash,filter)] ^ fingerprints1[xor16_get_h1(ki.hash,filter)];
     }
-    filter->fingerprints[ki.index] = val;
+    filter->fingerprints[ki.index] = (uint16_t)val;
   }
   xor_free_buffer(&buffer0);
   xor_free_buffer(&buffer1);
@@ -1111,7 +1112,7 @@ static inline bool xor16_populate(uint64_t *keys, uint32_t size, xor16_t *filter
   while (true) {
     iterations ++;
     if(iterations == XOR_SORT_ITERATIONS) {
-      size = xor_sort_and_remove_dup(keys, size);
+      size = (uint32_t)xor_sort_and_remove_dup(keys, size);
     }
     if(iterations > XOR_MAX_ITERATIONS) {
       // The probability of this happening is lower than the
@@ -1138,7 +1139,7 @@ static inline bool xor16_populate(uint64_t *keys, uint32_t size, xor16_t *filter
     size_t Q0size = 0, Q1size = 0, Q2size = 0;
     for (size_t i = 0; i < filter->blockLength; i++) {
       if (sets0[i].count == 1) {
-        Q0[Q0size].index = i;
+        Q0[Q0size].index = (uint32_t)i;
         Q0[Q0size].hash = sets0[i].xormask;
         Q0size++;
       }
@@ -1146,14 +1147,14 @@ static inline bool xor16_populate(uint64_t *keys, uint32_t size, xor16_t *filter
 
     for (size_t i = 0; i < filter->blockLength; i++) {
       if (sets1[i].count == 1) {
-        Q1[Q1size].index = i;
+        Q1[Q1size].index = (uint32_t)i;
         Q1[Q1size].hash = sets1[i].xormask;
         Q1size++;
       }
     }
     for (size_t i = 0; i < filter->blockLength; i++) {
       if (sets2[i].count == 1) {
-        Q2[Q2size].index = i;
+        Q2[Q2size].index = (uint32_t)i;
         Q2[Q2size].hash = sets2[i].xormask;
         Q2size++;
       }
@@ -1197,7 +1198,7 @@ static inline bool xor16_populate(uint64_t *keys, uint32_t size, xor16_t *filter
         uint64_t hash = keyindex.hash;
         uint32_t h0 = xor16_get_h0(hash, filter);
         uint32_t h2 = xor16_get_h2(hash, filter);
-        keyindex.index += blockLength;
+        keyindex.index += (uint32_t)blockLength;
         stack[stack_size] = keyindex;
         stack_size++;
         sets0[h0].xormask ^= hash;
@@ -1226,7 +1227,7 @@ static inline bool xor16_populate(uint64_t *keys, uint32_t size, xor16_t *filter
 
         uint32_t h0 = xor16_get_h0(hash, filter);
         uint32_t h1 = xor16_get_h1(hash, filter);
-        keyindex.index += 2 * blockLength;
+        keyindex.index += 2 * (uint32_t)blockLength;
 
         stack[stack_size] = keyindex;
         stack_size++;
@@ -1263,13 +1264,13 @@ static inline bool xor16_populate(uint64_t *keys, uint32_t size, xor16_t *filter
     xor_keyindex_t ki = stack[--stack_size];
     uint64_t val = xor_fingerprint(ki.hash);
     if(ki.index < blockLength) {
-      val ^= fingerprints1[xor16_get_h1(ki.hash,filter)] ^ fingerprints2[xor16_get_h2(ki.hash,filter)];
+      val ^= (uint32_t)fingerprints1[xor16_get_h1(ki.hash,filter)] ^ fingerprints2[xor16_get_h2(ki.hash,filter)];
     } else if(ki.index < 2 * blockLength) {
-      val ^= fingerprints0[xor16_get_h0(ki.hash,filter)] ^ fingerprints2[xor16_get_h2(ki.hash,filter)];
+      val ^= (uint32_t)fingerprints0[xor16_get_h0(ki.hash,filter)] ^ fingerprints2[xor16_get_h2(ki.hash,filter)];
     } else {
-      val ^= fingerprints0[xor16_get_h0(ki.hash,filter)] ^ fingerprints1[xor16_get_h1(ki.hash,filter)];
+      val ^= (uint32_t)fingerprints0[xor16_get_h0(ki.hash,filter)] ^ fingerprints1[xor16_get_h1(ki.hash,filter)];
     }
-    filter->fingerprints[ki.index] = val;
+    filter->fingerprints[ki.index] = (uint16_t)val;
   }
 
   free(sets);
diff --git a/tests/unit.c b/tests/unit.c
index e5c2a62..3a30b96 100644
--- a/tests/unit.c
+++ b/tests/unit.c
@@ -2,400 +2,242 @@
 #include "xorfilter.h"
 #include <assert.h>
 
-bool testbufferedxor8(size_t size) {
-  printf("testing buffered xor8\n");
-
-  xor8_t filter;
-  xor8_allocate(size, &filter);
+// generic function dispatch
+
+bool gen_xor8_allocate(uint32_t size, void *filter) { return xor8_allocate(size, filter); }
+bool gen_xor16_allocate(uint32_t size, void *filter) { return xor16_allocate(size, filter); }
+bool gen_binary_fuse8_allocate(uint32_t size, void *filter) { return binary_fuse8_allocate(size, filter); }
+bool gen_binary_fuse16_allocate(uint32_t size, void *filter) { return binary_fuse16_allocate(size, filter); }
+
+void gen_xor8_free(void *filter) { xor8_free(filter); }
+void gen_xor16_free(void *filter) { xor16_free(filter); }
+void gen_binary_fuse8_free(void *filter) { binary_fuse8_free(filter); }
+void gen_binary_fuse16_free(void *filter) { binary_fuse16_free(filter); }
+
+size_t gen_xor8_size_in_bytes(const void *filter) { return xor8_size_in_bytes(filter); }
+size_t gen_xor16_size_in_bytes(const void *filter) { return xor16_size_in_bytes(filter); }
+size_t gen_binary_fuse8_size_in_bytes(const void *filter) { return binary_fuse8_size_in_bytes(filter); }
+size_t gen_binary_fuse16_size_in_bytes(const void *filter) { return binary_fuse16_size_in_bytes(filter); }
+
+size_t gen_xor8_serialization_bytes(void *filter) { return xor8_serialization_bytes(filter); }
+size_t gen_xor16_serialization_bytes(void *filter) { return xor16_serialization_bytes(filter); }
+size_t gen_binary_fuse8_serialization_bytes(void *filter) { return binary_fuse8_serialization_bytes(filter); }
+size_t gen_binary_fuse16_serialization_bytes(void *filter) { return binary_fuse16_serialization_bytes(filter); }
+
+void gen_xor8_serialize(void *filter, char *buffer) { xor8_serialize(filter, buffer); }
+void gen_xor16_serialize(void *filter, char *buffer) { xor16_serialize(filter, buffer); }
+void gen_binary_fuse8_serialize(void *filter, char *buffer) { binary_fuse8_serialize(filter, buffer); }
+void gen_binary_fuse16_serialize(void *filter, char *buffer) { binary_fuse16_serialize(filter, buffer); }
+
+bool gen_xor8_deserialize(void *filter, const char *buffer) { return xor8_deserialize(filter, buffer); }
+bool gen_xor16_deserialize(void *filter, const char *buffer) { return xor16_deserialize(filter, buffer); }
+bool gen_binary_fuse8_deserialize(void *filter, const char *buffer) { return binary_fuse8_deserialize(filter, buffer); }
+bool gen_binary_fuse16_deserialize(void *filter, const char *buffer) { return binary_fuse16_deserialize(filter, buffer); }
+
+bool gen_xor8_populate(uint64_t *keys, uint32_t size, void *filter) { return xor8_populate(keys, size, filter); }
+bool gen_xor8_buffered_populate(uint64_t *keys, uint32_t size, void *filter) { return xor8_buffered_populate(keys, size, filter); }
+bool gen_xor16_populate(uint64_t *keys, uint32_t size, void *filter) { return xor16_populate(keys, size, filter); }
+bool gen_xor16_buffered_populate(uint64_t *keys, uint32_t size, void *filter) { return xor16_buffered_populate(keys, size, filter); }
+bool gen_binary_fuse8_populate(uint64_t *keys, uint32_t size, void *filter) { return binary_fuse8_populate(keys, size, filter); }
+bool gen_binary_fuse16_populate(uint64_t *keys, uint32_t size, void *filter) { return binary_fuse16_populate(keys, size, filter); }
+
+bool gen_xor8_contain(uint64_t key, const void *filter) { return xor8_contain(key, filter); }
+bool gen_xor16_contain(uint64_t key, const void *filter) { return xor16_contain(key, filter); }
+bool gen_binary_fuse8_contain(uint64_t key, const void *filter) { return binary_fuse8_contain(key, filter); }
+bool gen_binary_fuse16_contain(uint64_t key, const void *filter) { return binary_fuse16_contain(key, filter); }
+
+typedef bool (*allocate_fpt)(uint32_t size, void *filter);
+typedef void (*free_fpt)(void *filter);
+typedef size_t (*size_in_bytes_fpt)(const void *filter);
+typedef size_t (*serialization_bytes_fpt)(void *filter);
+typedef void (*serialize_fpt)(void *filter, char *buffer);
+typedef bool (*deserialize_fpt)(void *filter, const char *buffer);
+typedef bool (*populate_fpt)(uint64_t *keys, uint32_t size, void *filter);
+typedef bool (*contain_fpt)(uint64_t key, const void *filter);
+
+// generic test runner
+
+bool test(size_t size, size_t repeated_size, void *filter,
+          allocate_fpt allocate,
+          free_fpt free_filter,
+          size_in_bytes_fpt size_in_bytes,
+          serialization_bytes_fpt serialization_bytes,
+          serialize_fpt serialize,
+          deserialize_fpt deserialize,
+          populate_fpt populate,
+          contain_fpt contain) {
+  allocate((uint32_t)size, filter);
   // we need some set of values
   uint64_t *big_set = (uint64_t *)malloc(sizeof(uint64_t) * size);
-  for (size_t i = 0; i < size; i++) {
+  for (size_t i = 0; i < size - repeated_size; i++) {
     big_set[i] = i; // we use contiguous values
   }
-  // we construct the filter
-  if(!xor8_buffered_populate(big_set, size, &filter)) { return false; }
-  for (size_t i = 0; i < size; i++) {
-    if (!xor8_contain(big_set[i], &filter)) {
-      printf("bug!\n");
-      return false;
-    }
-  }
-
-  size_t random_matches = 0;
-  size_t trials = 10000000;
-  for (size_t i = 0; i < trials; i++) {
-    uint64_t random_key = ((uint64_t)rand() << 32) + rand();
-    if (xor8_contain(random_key, &filter)) {
-      if (random_key >= size) {
-        random_matches++;
-      }
-    }
-  }
-  double fpp = random_matches * 1.0 / trials;
-  printf(" fpp %3.5f (estimated) \n", fpp);
-  double bpe = xor8_size_in_bytes(&filter) * 8.0 / size;
-  printf(" bits per entry %3.2f\n", bpe);
-  printf(" bits per entry %3.2f (theoretical lower bound)\n", - log(fpp)/log(2));
-  printf(" efficiency ratio %3.3f \n", bpe /(- log(fpp)/log(2)));
-
-  xor8_free(&filter);
-  free(big_set);
-  return true;
-}
-
-
-bool testxor8(size_t size) {
-  printf("testing xor8\n");
-
-  xor8_t filter;
-  xor8_allocate(size, &filter);
-  // we need some set of values
-  uint64_t *big_set = (uint64_t *)malloc(sizeof(uint64_t) * size);
-  for (size_t i = 0; i < size; i++) {
-    big_set[i] = i; // we use contiguous values
+  for (size_t i = 0; i < repeated_size; i++) {
+    big_set[size - i - 1] = i; // we use contiguous values
   }
   // we construct the filter
-  if(!xor8_populate(big_set, size, &filter)) { return false; }
+  if(!populate(big_set, (uint32_t)size, filter)) { return false; }
   for (size_t i = 0; i < size; i++) {
-    if (!xor8_contain(big_set[i], &filter)) {
+    if (!contain(big_set[i], filter)) {
       printf("bug!\n");
       return false;
     }
   }
 
-  size_t buffer_size = xor8_serialization_bytes(&filter);
+  size_t buffer_size = serialization_bytes(filter);
   char *buffer = (char*)malloc(buffer_size);
-  xor8_serialize(&filter, buffer);
-  xor8_free(&filter);
-  xor8_deserialize(&filter, buffer);
+  serialize(filter, buffer);
+  free_filter(filter);
+  deserialize(filter, buffer);
   free(buffer);
   for (size_t i = 0; i < size; i++) {
-    if (!xor8_contain(big_set[i], &filter)) {
+    if (!contain(big_set[i], filter)) {
       printf("bug!\n");
       return false;
     }
   }
-
+  
   size_t random_matches = 0;
   size_t trials = 10000000;
   for (size_t i = 0; i < trials; i++) {
-    uint64_t random_key = ((uint64_t)rand() << 32) + rand();
-    if (xor8_contain(random_key, &filter)) {
+    uint64_t random_key = ((uint64_t)rand() << 32U) + (uint64_t)rand();
+    if (contain(random_key, filter)) {
       if (random_key >= size) {
         random_matches++;
       }
     }
   }
-  double fpp = random_matches * 1.0 / trials;
+  double fpp = (double)random_matches * 1.0 / (double)trials;
   printf(" fpp %3.5f (estimated) \n", fpp);
-  double bpe = xor8_size_in_bytes(&filter) * 8.0 / size;
+  double bpe = (double)size_in_bytes(filter) * 8.0 / (double)size;
   printf(" bits per entry %3.2f\n", bpe);
   printf(" bits per entry %3.2f (theoretical lower bound)\n", - log(fpp)/log(2));
   printf(" efficiency ratio %3.3f \n", bpe /(- log(fpp)/log(2)));
-  xor8_free(&filter);
+  free_filter(filter);
   free(big_set);
   return true;
 }
 
-bool testxor16(size_t size) {
-  printf("testing xor16\n");
-  xor16_t filter;
-  xor16_allocate(size, &filter);
-  // we need some set of values
-  uint64_t *big_set = (uint64_t *)malloc(sizeof(uint64_t) * size);
-  for (size_t i = 0; i < size; i++) {
-    big_set[i] = i; // we use contiguous values
-  }
-  // we construct the filter
-  if(!xor16_populate(big_set, size, &filter)) { return false; }
-  for (size_t i = 0; i < size; i++) {
-    if (!xor16_contain(big_set[i], &filter)) {
-      printf("bug!\n");
-      return false;
-    }
-  }
+bool testbufferedxor8(size_t size) {
+  printf("testing buffered xor8\n");
+  xor8_t filter = {0}; // zero initialisation silences unitialized warning
+  return test(size, 0, &filter,
+              gen_xor8_allocate,
+              gen_xor8_free,
+              gen_xor8_size_in_bytes,
+              gen_xor8_serialization_bytes,
+              gen_xor8_serialize,
+              gen_xor8_deserialize,
+              gen_xor8_buffered_populate,
+              gen_xor8_contain);
+}
 
-  size_t buffer_size = xor16_serialization_bytes(&filter);
-  char *buffer = (char*)malloc(buffer_size);
-  xor16_serialize(&filter, buffer);
-  xor16_free(&filter);
-  xor16_deserialize(&filter, buffer);
-  free(buffer);
-  for (size_t i = 0; i < size; i++) {
-    if (!xor16_contain(big_set[i], &filter)) {
-      printf("bug!\n");
-      return false;
-    }
-  }
 
-  size_t random_matches = 0;
-  size_t trials = 10000000;
-  for (size_t i = 0; i < trials; i++) {
-    uint64_t random_key = ((uint64_t)rand() << 32) + rand();
-    if (xor16_contain(random_key, &filter)) {
-      if (random_key >= size) {
-        random_matches++;
-      }
-    }
-  }
-  double fpp = random_matches * 1.0 / trials;
-  printf(" fpp %3.5f (estimated) \n", fpp);
-  double bpe = xor16_size_in_bytes(&filter) * 8.0 / size;
-  printf(" bits per entry %3.2f\n", bpe);
-  printf(" bits per entry %3.2f (theoretical lower bound)\n", - log(fpp)/log(2));
-  printf(" efficiency ratio %3.3f \n", bpe /(- log(fpp)/log(2)));
-  xor16_free(&filter);
-  free(big_set);
-  return true;
+bool testxor8(size_t size) {
+  printf("testing xor8\n");
+
+  xor8_t filter = {0}; // zero initialisation silences unitialized warning
+  return test(size, 0, &filter,
+              gen_xor8_allocate,
+              gen_xor8_free,
+              gen_xor8_size_in_bytes,
+              gen_xor8_serialization_bytes,
+              gen_xor8_serialize,
+              gen_xor8_deserialize,
+              gen_xor8_populate,
+              gen_xor8_contain);
 }
 
+bool testxor16(size_t size) {
+  printf("testing xor16\n");
+  xor16_t filter = {0}; // zero initialisation silences unitialized warning
+  return test(size, 0, &filter,
+              gen_xor16_allocate,
+              gen_xor16_free,
+              gen_xor16_size_in_bytes,
+              gen_xor16_serialization_bytes,
+              gen_xor16_serialize,
+              gen_xor16_deserialize,
+              gen_xor16_populate,
+              gen_xor16_contain);
+}
 
-bool testbufferedxor16(size_t size) {
-  printf("testing buffered xor16\n");
-  xor16_t filter;
-  xor16_allocate(size, &filter);
-  // we need some set of values
-  uint64_t *big_set = (uint64_t *)malloc(sizeof(uint64_t) * size);
-  for (size_t i = 0; i < size; i++) {
-    big_set[i] = i; // we use contiguous values
-  }
-  // we construct the filter
-  if(!xor16_buffered_populate(big_set, size, &filter)) { return false; }
-  for (size_t i = 0; i < size; i++) {
-    if (!xor16_contain(big_set[i], &filter)) {
-      printf("bug!\n");
-      return false;
-    }
-  }
 
-  size_t buffer_size = xor16_serialization_bytes(&filter);
-  char *buffer = (char*)malloc(buffer_size);
-  xor16_serialize(&filter, buffer);
-  xor16_free(&filter);
-  xor16_deserialize(&filter, buffer);
-  free(buffer);
-  for (size_t i = 0; i < size; i++) {
-    if (!xor16_contain(big_set[i], &filter)) {
-      printf("bug!\n");
-      return false;
-    }
-  }
 
-  size_t random_matches = 0;
-  size_t trials = 10000000;
-  for (size_t i = 0; i < trials; i++) {
-    uint64_t random_key = ((uint64_t)rand() << 32) + rand();
-    if (xor16_contain(random_key, &filter)) {
-      if (random_key >= size) {
-        random_matches++;
-      }
-    }
-  }
-  double fpp = random_matches * 1.0 / trials;
-  printf(" fpp %3.5f (estimated) \n", fpp);
-  double bpe = xor16_size_in_bytes(&filter) * 8.0 / size;
-  printf(" bits per entry %3.2f\n", bpe);
-  printf(" bits per entry %3.2f (theoretical lower bound)\n", - log(fpp)/log(2));
-  printf(" efficiency ratio %3.3f \n", bpe /(- log(fpp)/log(2)));
-  xor16_free(&filter);
-  free(big_set);
-  return true;
+bool testbufferedxor16(size_t size) {
+  printf("testing buffered xor16\n");
+  xor16_t filter = {0}; // zero initialisation silences unitialized warning
+  return test(size, 0, &filter,
+              gen_xor16_allocate,
+              gen_xor16_free,
+              gen_xor16_size_in_bytes,
+              gen_xor16_serialization_bytes,
+              gen_xor16_serialize,
+              gen_xor16_deserialize,
+              gen_xor16_buffered_populate,
+              gen_xor16_contain);
 }
 
 bool testbinaryfuse8(size_t size) {
   printf("testing binary fuse8 with size %zu\n", size);
-  binary_fuse8_t filter;
-  binary_fuse8_allocate(size, &filter);
-  // we need some set of values
-  uint64_t *big_set = (uint64_t *)malloc(sizeof(uint64_t) * size);
-  for (size_t i = 0; i < size; i++) {
-    big_set[i] = i; // we use contiguous values
-  }
-  // we construct the filter
-  if(!binary_fuse8_populate(big_set, size, &filter)) { printf("failure to populate\n"); return false; }
-  for (size_t i = 0; i < size; i++) {
-    if (!binary_fuse8_contain(big_set[i], &filter)) {
-      printf("bug!\n");
-      return false;
-    }
-  }
-
-  size_t buffer_size = binary_fuse8_serialization_bytes(&filter);
-  char *buffer = (char*)malloc(buffer_size);
-  binary_fuse8_serialize(&filter, buffer);
-  binary_fuse8_free(&filter);
-  binary_fuse8_deserialize(&filter, buffer);
-  free(buffer);
-  for (size_t i = 0; i < size; i++) {
-    if (!binary_fuse8_contain(big_set[i], &filter)) {
-      printf("bug!\n");
-      return false;
-    }
-  }
-
-  size_t random_matches = 0;
-  size_t trials = 10000000;
-  for (size_t i = 0; i < trials; i++) {
-    uint64_t random_key = ((uint64_t)rand() << 32) + rand();
-    if (binary_fuse8_contain(random_key, &filter)) {
-      if (random_key >= size) {
-        random_matches++;
-      }
-    }
-  }
-  double fpp = random_matches * 1.0 / trials;
-  printf(" fpp %3.5f (estimated) \n", fpp);
-  double bpe = binary_fuse8_size_in_bytes(&filter) * 8.0 / size;
-  printf(" bits per entry %3.2f\n", bpe);
-  printf(" bits per entry %3.2f (theoretical lower bound)\n", - log(fpp)/log(2));
-  printf(" efficiency ratio %3.3f \n", bpe /(- log(fpp)/log(2)));
-  binary_fuse8_free(&filter);
-  free(big_set);
-  return true;
+  binary_fuse8_t filter = {0}; // zero initialisation silences unitialized warning
+  return test(size, 0, &filter,
+              gen_binary_fuse8_allocate,
+              gen_binary_fuse8_free,
+              gen_binary_fuse8_size_in_bytes,
+              gen_binary_fuse8_serialization_bytes,
+              gen_binary_fuse8_serialize,
+              gen_binary_fuse8_deserialize,
+              gen_binary_fuse8_populate,
+              gen_binary_fuse8_contain);
 }
 
 
 
 bool testbinaryfuse16(size_t size) {
   printf("testing binary fuse16\n");
-  binary_fuse16_t filter;
-  binary_fuse16_allocate(size, &filter);
-  // we need some set of values
-  uint64_t *big_set = (uint64_t *)malloc(sizeof(uint64_t) * size);
-  for (size_t i = 0; i < size; i++) {
-    big_set[i] = i; // we use contiguous values
-  }
-  // we construct the filter
-  if(!binary_fuse16_populate(big_set, size, &filter)) {  printf("failure to populate\n"); return false; }
-  for (size_t i = 0; i < size; i++) {
-    if (!binary_fuse16_contain(big_set[i], &filter)) {
-      printf("bug!\n");
-      return false;
-    }
-  }
-
-  size_t buffer_size = binary_fuse16_serialization_bytes(&filter);
-  char *buffer = (char*)malloc(buffer_size);
-  binary_fuse16_serialize(&filter, buffer);
-  binary_fuse16_free(&filter);
-  binary_fuse16_deserialize(&filter, buffer);
-  free(buffer);
-  for (size_t i = 0; i < size; i++) {
-    if (!binary_fuse16_contain(big_set[i], &filter)) {
-      printf("bug!\n");
-      return false;
-    }
-  }
-
-  size_t random_matches = 0;
-  size_t trials = 10000000;
-  for (size_t i = 0; i < trials; i++) {
-    uint64_t random_key = ((uint64_t)rand() << 32) + rand();
-    if (binary_fuse16_contain(random_key, &filter)) {
-      if (random_key >= size) {
-        random_matches++;
-      }
-    }
-  }
-  double fpp = random_matches * 1.0 / trials;
-  printf(" fpp %3.5f (estimated) \n", fpp);
-  double bpe = binary_fuse16_size_in_bytes(&filter) * 8.0 / size;
-  printf(" bits per entry %3.2f\n", bpe);
-  printf(" bits per entry %3.2f (theoretical lower bound)\n", - log(fpp)/log(2));
-  printf(" efficiency ratio %3.3f \n", bpe /(- log(fpp)/log(2)));
-  binary_fuse16_free(&filter);
-  free(big_set);
-  return true;
+  binary_fuse16_t filter = {0}; // zero initialisation silences unitialized warning
+  return test(size, 0, &filter,
+              gen_binary_fuse16_allocate,
+              gen_binary_fuse16_free,
+              gen_binary_fuse16_size_in_bytes,
+              gen_binary_fuse16_serialization_bytes,
+              gen_binary_fuse16_serialize,
+              gen_binary_fuse16_deserialize,
+              gen_binary_fuse16_populate,
+              gen_binary_fuse16_contain);
 }
 
 
 
 bool testbinaryfuse8_dup(size_t size) {
   printf("testing binary fuse8 with duplicates\n");
-  binary_fuse8_t filter;
-  binary_fuse8_allocate(size, &filter);
-  // we need some set of values
-  uint64_t *big_set = (uint64_t *)malloc(sizeof(uint64_t) * size);
-  size_t repeated_size = 10;
-  for (size_t i = 0; i < size - repeated_size; i++) {
-    big_set[i] = i; // we use contiguous values
-  }
-  for (size_t i = 0; i < repeated_size; i++) {
-    big_set[size - i - 1] = i; // we use contiguous values
-  }
-  // we construct the filter
-  if(!binary_fuse8_populate(big_set, size, &filter)) { return false; }
-  for (size_t i = 0; i < size; i++) {
-    if (!binary_fuse8_contain(big_set[i], &filter)) {
-      printf("bug!\n");
-      return false;
-    }
-  }
-
-  size_t random_matches = 0;
-  size_t trials = 10000000;
-  for (size_t i = 0; i < trials; i++) {
-    uint64_t random_key = ((uint64_t)rand() << 32) + rand();
-    if (binary_fuse8_contain(random_key, &filter)) {
-      if (random_key >= size) {
-        random_matches++;
-      }
-    }
-  }
-  double fpp = random_matches * 1.0 / trials;
-  printf(" fpp %3.5f (estimated) \n", fpp);
-  double bpe = binary_fuse8_size_in_bytes(&filter) * 8.0 / size;
-  printf(" bits per entry %3.2f\n", bpe);
-  printf(" bits per entry %3.2f (theoretical lower bound)\n", - log(fpp)/log(2));
-  printf(" efficiency ratio %3.3f \n", bpe /(- log(fpp)/log(2)));
-  binary_fuse8_free(&filter);
-  free(big_set);
-  return true;
+  binary_fuse8_t filter = {0}; // zero initialisation silences unitialized warning
+  return test(size, 10, &filter,
+              gen_binary_fuse8_allocate,
+              gen_binary_fuse8_free,
+              gen_binary_fuse8_size_in_bytes,
+              gen_binary_fuse8_serialization_bytes,
+              gen_binary_fuse8_serialize,
+              gen_binary_fuse8_deserialize,
+              gen_binary_fuse8_populate,
+              gen_binary_fuse8_contain);
 }
 
 
 
 bool testbinaryfuse16_dup(size_t size) {
   printf("testing binary fuse16 with duplicates\n");
-  binary_fuse16_t filter;
-  binary_fuse16_allocate(size, &filter);
-  // we need some set of values
-  uint64_t *big_set = (uint64_t *)malloc(sizeof(uint64_t) * size);
-  size_t repeated_size = 10;
-  for (size_t i = 0; i < size - repeated_size; i++) {
-    big_set[i] = i; // we use contiguous values
-  }
-  for (size_t i = 0; i < repeated_size; i++) {
-    big_set[size - i - 1] = i; // we use contiguous values
-  }
-  // we construct the filter
-  if(!binary_fuse16_populate(big_set, size, &filter)) { return false; }
-  for (size_t i = 0; i < size; i++) {
-    if (!binary_fuse16_contain(big_set[i], &filter)) {
-      printf("bug!\n");
-      return false;
-    }
-  }
-
-  size_t random_matches = 0;
-  size_t trials = 10000000;
-  for (size_t i = 0; i < trials; i++) {
-    uint64_t random_key = ((uint64_t)rand() << 32) + rand();
-    if (binary_fuse16_contain(random_key, &filter)) {
-      if (random_key >= size) {
-        random_matches++;
-      }
-    }
-  }
-  double fpp = random_matches * 1.0 / trials;
-  printf(" fpp %3.5f (estimated) \n", fpp);
-  double bpe = binary_fuse16_size_in_bytes(&filter) * 8.0 / size;
-  printf(" bits per entry %3.2f\n", bpe);
-  printf(" bits per entry %3.2f (theoretical lower bound)\n", - log(fpp)/log(2));
-  printf(" efficiency ratio %3.3f \n", bpe /(- log(fpp)/log(2)));
-  binary_fuse16_free(&filter);
-  free(big_set);
-  return true;
+  binary_fuse16_t filter = {0}; // zero initialisation silences unitialized warning
+  return test(size, 10, &filter,
+              gen_binary_fuse16_allocate,
+              gen_binary_fuse16_free,
+              gen_binary_fuse16_size_in_bytes,
+              gen_binary_fuse16_serialization_bytes,
+              gen_binary_fuse16_serialize,
+              gen_binary_fuse16_deserialize,
+              gen_binary_fuse16_populate,
+              gen_binary_fuse16_contain);
 }
 
 void failure_rate_binary_fuse16() {
@@ -404,19 +246,20 @@ void failure_rate_binary_fuse16() {
   size_t size = 5000;
   uint64_t *big_set = (uint64_t *)malloc(sizeof(uint64_t) * size);
   binary_fuse16_t filter;
-  binary_fuse16_allocate(size, &filter);
+  binary_fuse16_allocate((uint32_t)size, &filter);
   size_t failure = 0;
   size_t total_trials = 1000000;
 
   for(size_t trial = 0; trial <= 1000; trial++) {
     for (size_t i = 0; i < size; i++) {
-      big_set[i] = rand() + (((uint64_t) rand()) << 32);
+      big_set[i] = (uint64_t)rand() + (((uint64_t) rand()) << 32U);
     }
-    if(!binary_fuse16_populate(big_set, size, &filter)) {
+    if(!binary_fuse16_populate(big_set, (uint32_t)size, &filter)) {
       failure++;
     }
   }
   printf("failures %zu out of %zu\n\n", failure, total_trials);
+  binary_fuse16_free(&filter);
   free(big_set);
 }
 
@@ -450,4 +293,4 @@ int main() {
   if(!testbinaryfuse16(0)) { abort(); }
   if(!testbinaryfuse16(1)) { abort(); }
   if(!testbinaryfuse16(2)) { abort(); }
-}
\ No newline at end of file
+}