Skip to content

Commit

Permalink
removing unnecessary boiler plate in unit.c (#65)
Browse files Browse the repository at this point in the history
* first cut at reducing warnings

binaryfusefilter.h only

issues addressed

1. changed some return value and parameter types of (static) functions
-- PLEASE CHECK THIS IN REVIEW

2. sprinkled 'U' into bitwise operations to silence warnings

3. casting to avoid "standard integer promotion rules" which resulted
in signedness warnings

4. explicitly reducing results to the target type rather than letting
it happen implicitly

tests still passing

* first cut at reducing warnings

binaryfusefilter.h only

issues addressed

1. changed some return value and parameter types of (static) functions
-- PLEASE CHECK THIS IN REVIEW

2. sprinkled 'U' into bitwise operations to silence warnings

3. casting to avoid "standard integer promotion rules" which resulted
in signedness warnings

4. explicitly reducing results to the target type rather than letting
it happen implicitly

5. when and `if` statements ends in break or return, then a following
`else if` can be just a new `if`

tests still passing

* starting work on xofilter.h

* binclude/binaryfusefilter.h apparently clean for first time

* formatting

* first cut on xofilter.h

mostly casting size_t down to uint32_t - maybe some internal struct
types should have been size_t?

also some integer promotion casts

* round2 on xorfilter.h

mostly casting blocklengt to uint32_t to fit into keyindex.index

should keyindex.index be a size_t?

* bench.c and unit.c

very repetitive casting of mainly sizes and doubles.

* all silent now on a clean compile

with -Wconversion and -Wsign-conversion

so putting these in the Makefile, so during "private" development with
the Makefile new warnings will be noticed straight away

but not in CMakeLists.txt, because as this is a header-only INTERFACE
library, it would force these warning levels on the users.

* another sweep from including c++ project

turned up these additional 'U' tweaks

* mistaken cast which broke test

* factoring out the report functionality

all sections were indentical except for the call to *contain()
and *size_in_bytes

some void* and function pointer juggling allowed to make this generic

report code reduced by 2/3rds

* iron out slight inconsistencies between tests

* abstracting away the rest of the test logic

for all but the special "failure rate test"

the large function dispatch table is a litle annoying, but can be
removed as well...TBC

tests all pass

* fixing a memory leak caught by sanitizer

just a missing free()

* _duplicates test cases can be convered by the same code

remove initialization. it's not needed and compiler now happy

* removing the need for large array of boiler plate function wrappers

instead of having a wrapper function per action per filter type, we
can cast the functions as a generic function pointer on the into the
generic test runner, and then cast them as a compatible function
pointer type inside the test runner.

The generic `filter*` parameter cannot be `void*` and must be a
dummy struct because: § 6.2.5..28: "All pointers to structure types shall have the
same representation and alignment requirements as each other"  (the
same is not true for `void*` which may have different representation.

This simple change results in a large code reduction and removes the
unsightly and hard to remove array of boiler plate function wrappers.
  • Loading branch information
oschonrock authored Dec 14, 2024
1 parent 890b31d commit 872a8bb
Showing 1 changed file with 104 additions and 175 deletions.
279 changes: 104 additions & 175 deletions tests/unit.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,71 +2,33 @@
#include "xorfilter.h"
#include <assert.h>

// generic function dispatch

bool gen_xor8_allocate(uint32_t size, void *filter) { return xor8_allocate(size, filter); }
bool gen_xor16_allocate(uint32_t size, void *filter) { return xor16_allocate(size, filter); }
bool gen_binary_fuse8_allocate(uint32_t size, void *filter) { return binary_fuse8_allocate(size, filter); }
bool gen_binary_fuse16_allocate(uint32_t size, void *filter) { return binary_fuse16_allocate(size, filter); }

void gen_xor8_free(void *filter) { xor8_free(filter); }
void gen_xor16_free(void *filter) { xor16_free(filter); }
void gen_binary_fuse8_free(void *filter) { binary_fuse8_free(filter); }
void gen_binary_fuse16_free(void *filter) { binary_fuse16_free(filter); }

size_t gen_xor8_size_in_bytes(const void *filter) { return xor8_size_in_bytes(filter); }
size_t gen_xor16_size_in_bytes(const void *filter) { return xor16_size_in_bytes(filter); }
size_t gen_binary_fuse8_size_in_bytes(const void *filter) { return binary_fuse8_size_in_bytes(filter); }
size_t gen_binary_fuse16_size_in_bytes(const void *filter) { return binary_fuse16_size_in_bytes(filter); }

size_t gen_xor8_serialization_bytes(void *filter) { return xor8_serialization_bytes(filter); }
size_t gen_xor16_serialization_bytes(void *filter) { return xor16_serialization_bytes(filter); }
size_t gen_binary_fuse8_serialization_bytes(void *filter) { return binary_fuse8_serialization_bytes(filter); }
size_t gen_binary_fuse16_serialization_bytes(void *filter) { return binary_fuse16_serialization_bytes(filter); }

void gen_xor8_serialize(void *filter, char *buffer) { xor8_serialize(filter, buffer); }
void gen_xor16_serialize(void *filter, char *buffer) { xor16_serialize(filter, buffer); }
void gen_binary_fuse8_serialize(void *filter, char *buffer) { binary_fuse8_serialize(filter, buffer); }
void gen_binary_fuse16_serialize(void *filter, char *buffer) { binary_fuse16_serialize(filter, buffer); }

bool gen_xor8_deserialize(void *filter, const char *buffer) { return xor8_deserialize(filter, buffer); }
bool gen_xor16_deserialize(void *filter, const char *buffer) { return xor16_deserialize(filter, buffer); }
bool gen_binary_fuse8_deserialize(void *filter, const char *buffer) { return binary_fuse8_deserialize(filter, buffer); }
bool gen_binary_fuse16_deserialize(void *filter, const char *buffer) { return binary_fuse16_deserialize(filter, buffer); }

bool gen_xor8_populate(uint64_t *keys, uint32_t size, void *filter) { return xor8_populate(keys, size, filter); }
bool gen_xor8_buffered_populate(uint64_t *keys, uint32_t size, void *filter) { return xor8_buffered_populate(keys, size, filter); }
bool gen_xor16_populate(uint64_t *keys, uint32_t size, void *filter) { return xor16_populate(keys, size, filter); }
bool gen_xor16_buffered_populate(uint64_t *keys, uint32_t size, void *filter) { return xor16_buffered_populate(keys, size, filter); }
bool gen_binary_fuse8_populate(uint64_t *keys, uint32_t size, void *filter) { return binary_fuse8_populate(keys, size, filter); }
bool gen_binary_fuse16_populate(uint64_t *keys, uint32_t size, void *filter) { return binary_fuse16_populate(keys, size, filter); }

bool gen_xor8_contain(uint64_t key, const void *filter) { return xor8_contain(key, filter); }
bool gen_xor16_contain(uint64_t key, const void *filter) { return xor16_contain(key, filter); }
bool gen_binary_fuse8_contain(uint64_t key, const void *filter) { return binary_fuse8_contain(key, filter); }
bool gen_binary_fuse16_contain(uint64_t key, const void *filter) { return binary_fuse16_contain(key, filter); }

typedef bool (*allocate_fpt)(uint32_t size, void *filter);
typedef void (*free_fpt)(void *filter);
typedef size_t (*size_in_bytes_fpt)(const void *filter);
typedef size_t (*serialization_bytes_fpt)(void *filter);
typedef void (*serialize_fpt)(void *filter, char *buffer);
typedef bool (*deserialize_fpt)(void *filter, const char *buffer);
typedef bool (*populate_fpt)(uint64_t *keys, uint32_t size, void *filter);
typedef bool (*contain_fpt)(uint64_t key, const void *filter);
// generic proxy for filter, important that this is a struct, not void
// as § 6.2.5..28: "All pointers to structure types shall have the
// same representation and alignment requirements as each other"
typedef struct { int dummy_; } gen_filter;

typedef bool (*allocate_fpt)(uint32_t size, gen_filter *filter);
typedef void (*free_fpt)(gen_filter *filter);
typedef size_t (*size_in_bytes_fpt)(const gen_filter *filter);
typedef size_t (*serialization_bytes_fpt)(gen_filter *filter);
typedef void (*serialize_fpt)(gen_filter *filter, char *buffer);
typedef bool (*deserialize_fpt)(gen_filter *filter, const char *buffer);
typedef bool (*populate_fpt)(uint64_t *keys, uint32_t size, gen_filter *filter);
typedef bool (*contain_fpt)(uint64_t key, const gen_filter *filter);

typedef void (*gfp)(void); // generic function pointer

// generic test runner

bool test(size_t size, size_t repeated_size, void *filter,
allocate_fpt allocate,
free_fpt free_filter,
size_in_bytes_fpt size_in_bytes,
serialization_bytes_fpt serialization_bytes,
serialize_fpt serialize,
deserialize_fpt deserialize,
populate_fpt populate,
contain_fpt contain) {
allocate((uint32_t)size, filter);
gfp allocate,
gfp free_filter,
gfp size_in_bytes,
gfp serialization_bytes,
gfp serialize,
gfp deserialize,
gfp populate,
gfp contain) {
((allocate_fpt)allocate)((uint32_t)size, filter);
// we need some set of values
uint64_t *big_set = (uint64_t *)malloc(sizeof(uint64_t) * size);
for (size_t i = 0; i < size - repeated_size; i++) {
Expand All @@ -76,22 +38,22 @@ bool test(size_t size, size_t repeated_size, void *filter,
big_set[size - i - 1] = i; // we use contiguous values
}
// we construct the filter
if(!populate(big_set, (uint32_t)size, filter)) { return false; }
if(!((populate_fpt)populate)(big_set, (uint32_t)size, filter)) { return false; }
for (size_t i = 0; i < size; i++) {
if (!contain(big_set[i], filter)) {
if (!((contain_fpt)contain)(big_set[i], filter)) {
printf("bug!\n");
return false;
}
}

size_t buffer_size = serialization_bytes(filter);
size_t buffer_size = ((serialization_bytes_fpt)serialization_bytes)(filter);
char *buffer = (char*)malloc(buffer_size);
serialize(filter, buffer);
free_filter(filter);
deserialize(filter, buffer);
((serialize_fpt)serialize)(filter, buffer);
((free_fpt)free_filter)(filter);
((deserialize_fpt)deserialize)(filter, buffer);
free(buffer);
for (size_t i = 0; i < size; i++) {
if (!contain(big_set[i], filter)) {
if (!((contain_fpt)contain)(big_set[i], filter)) {
printf("bug!\n");
return false;
}
Expand All @@ -101,143 +63,110 @@ bool test(size_t size, size_t repeated_size, void *filter,
size_t trials = 10000000;
for (size_t i = 0; i < trials; i++) {
uint64_t random_key = ((uint64_t)rand() << 32U) + (uint64_t)rand();
if (contain(random_key, filter)) {
if (((contain_fpt)contain)(random_key, filter)) {
if (random_key >= size) {
random_matches++;
}
}
}
double fpp = (double)random_matches * 1.0 / (double)trials;
printf(" fpp %3.5f (estimated) \n", fpp);
double bpe = (double)size_in_bytes(filter) * 8.0 / (double)size;
double bpe = (double)((size_in_bytes_fpt)size_in_bytes)(filter) * 8.0 / (double)size;
printf(" bits per entry %3.2f\n", bpe);
printf(" bits per entry %3.2f (theoretical lower bound)\n", - log(fpp)/log(2));
printf(" efficiency ratio %3.3f \n", bpe /(- log(fpp)/log(2)));
free_filter(filter);
((free_fpt)free_filter)(filter);
free(big_set);
return true;
}

bool testbufferedxor8(size_t size) {
printf("testing buffered xor8\n");
xor8_t filter = {0}; // zero initialisation silences unitialized warning
xor8_t filter;
return test(size, 0, &filter,
gen_xor8_allocate,
gen_xor8_free,
gen_xor8_size_in_bytes,
gen_xor8_serialization_bytes,
gen_xor8_serialize,
gen_xor8_deserialize,
gen_xor8_buffered_populate,
gen_xor8_contain);
(gfp)xor8_allocate,
(gfp)xor8_free,
(gfp)xor8_size_in_bytes,
(gfp)xor8_serialization_bytes,
(gfp)xor8_serialize,
(gfp)xor8_deserialize,
(gfp)xor8_buffered_populate,
(gfp)xor8_contain);
}


bool testxor8(size_t size) {
printf("testing xor8\n");

xor8_t filter = {0}; // zero initialisation silences unitialized warning
xor8_t filter;
return test(size, 0, &filter,
gen_xor8_allocate,
gen_xor8_free,
gen_xor8_size_in_bytes,
gen_xor8_serialization_bytes,
gen_xor8_serialize,
gen_xor8_deserialize,
gen_xor8_populate,
gen_xor8_contain);
(gfp)xor8_allocate,
(gfp)xor8_free,
(gfp)xor8_size_in_bytes,
(gfp)xor8_serialization_bytes,
(gfp)xor8_serialize,
(gfp)xor8_deserialize,
(gfp)xor8_populate,
(gfp)xor8_contain);
}

bool testxor16(size_t size) {
printf("testing xor16\n");
xor16_t filter = {0}; // zero initialisation silences unitialized warning
xor16_t filter;
return test(size, 0, &filter,
gen_xor16_allocate,
gen_xor16_free,
gen_xor16_size_in_bytes,
gen_xor16_serialization_bytes,
gen_xor16_serialize,
gen_xor16_deserialize,
gen_xor16_populate,
gen_xor16_contain);
(gfp)xor16_allocate,
(gfp)xor16_free,
(gfp)xor16_size_in_bytes,
(gfp)xor16_serialization_bytes,
(gfp)xor16_serialize,
(gfp)xor16_deserialize,
(gfp)xor16_populate,
(gfp)xor16_contain);
}



bool testbufferedxor16(size_t size) {
printf("testing buffered xor16\n");
xor16_t filter = {0}; // zero initialisation silences unitialized warning
xor16_t filter;
return test(size, 0, &filter,
gen_xor16_allocate,
gen_xor16_free,
gen_xor16_size_in_bytes,
gen_xor16_serialization_bytes,
gen_xor16_serialize,
gen_xor16_deserialize,
gen_xor16_buffered_populate,
gen_xor16_contain);
(gfp)xor16_allocate,
(gfp)xor16_free,
(gfp)xor16_size_in_bytes,
(gfp)xor16_serialization_bytes,
(gfp)xor16_serialize,
(gfp)xor16_deserialize,
(gfp)xor16_buffered_populate,
(gfp)xor16_contain);
}

bool testbinaryfuse8(size_t size) {
printf("testing binary fuse8 with size %zu\n", size);
binary_fuse8_t filter = {0}; // zero initialisation silences unitialized warning
return test(size, 0, &filter,
gen_binary_fuse8_allocate,
gen_binary_fuse8_free,
gen_binary_fuse8_size_in_bytes,
gen_binary_fuse8_serialization_bytes,
gen_binary_fuse8_serialize,
gen_binary_fuse8_deserialize,
gen_binary_fuse8_populate,
gen_binary_fuse8_contain);
}



bool testbinaryfuse16(size_t size) {
printf("testing binary fuse16\n");
binary_fuse16_t filter = {0}; // zero initialisation silences unitialized warning
return test(size, 0, &filter,
gen_binary_fuse16_allocate,
gen_binary_fuse16_free,
gen_binary_fuse16_size_in_bytes,
gen_binary_fuse16_serialization_bytes,
gen_binary_fuse16_serialize,
gen_binary_fuse16_deserialize,
gen_binary_fuse16_populate,
gen_binary_fuse16_contain);
bool testbinaryfuse8(size_t size, size_t repeated_size) {
printf("testing binary fuse8 with size %zu and %zu duplicates\n", size, repeated_size);
binary_fuse8_t filter;
return test(size, repeated_size, &filter,
(gfp)binary_fuse8_allocate,
(gfp)binary_fuse8_free,
(gfp)binary_fuse8_size_in_bytes,
(gfp)binary_fuse8_serialization_bytes,
(gfp)binary_fuse8_serialize,
(gfp)binary_fuse8_deserialize,
(gfp)binary_fuse8_populate,
(gfp)binary_fuse8_contain);
}



bool testbinaryfuse8_dup(size_t size) {
printf("testing binary fuse8 with duplicates\n");
binary_fuse8_t filter = {0}; // zero initialisation silences unitialized warning
return test(size, 10, &filter,
gen_binary_fuse8_allocate,
gen_binary_fuse8_free,
gen_binary_fuse8_size_in_bytes,
gen_binary_fuse8_serialization_bytes,
gen_binary_fuse8_serialize,
gen_binary_fuse8_deserialize,
gen_binary_fuse8_populate,
gen_binary_fuse8_contain);
}



bool testbinaryfuse16_dup(size_t size) {
printf("testing binary fuse16 with duplicates\n");
binary_fuse16_t filter = {0}; // zero initialisation silences unitialized warning
return test(size, 10, &filter,
gen_binary_fuse16_allocate,
gen_binary_fuse16_free,
gen_binary_fuse16_size_in_bytes,
gen_binary_fuse16_serialization_bytes,
gen_binary_fuse16_serialize,
gen_binary_fuse16_deserialize,
gen_binary_fuse16_populate,
gen_binary_fuse16_contain);
bool testbinaryfuse16(size_t size, size_t repeated_size) {
printf("testing binary fuse16 with size %zu and %zu duplicates\n", size, repeated_size);
binary_fuse16_t filter;
return test(size, repeated_size, &filter,
(gfp)binary_fuse16_allocate,
(gfp)binary_fuse16_free,
(gfp)binary_fuse16_size_in_bytes,
(gfp)binary_fuse16_serialization_bytes,
(gfp)binary_fuse16_serialize,
(gfp)binary_fuse16_deserialize,
(gfp)binary_fuse16_populate,
(gfp)binary_fuse16_contain);
}

void failure_rate_binary_fuse16() {
Expand Down Expand Up @@ -267,13 +196,13 @@ int main() {
failure_rate_binary_fuse16();
for(size_t size = 1000; size <= 1000000; size *= 300) {
printf("== size = %zu \n", size);
if(!testbinaryfuse8(size)) { abort(); }
if(!testbinaryfuse8(size, 0)) { abort(); }
printf("\n");
if(!testbinaryfuse16(size)) { abort(); }
if(!testbinaryfuse16(size, 0)) { abort(); }
printf("\n");
if(!testbinaryfuse8_dup(size)) { abort(); }
if(!testbinaryfuse8(size, 10)) { abort(); }
printf("\n");
if(!testbinaryfuse16_dup(size)) { abort(); }
if(!testbinaryfuse16(size, 10)) { abort(); }
printf("\n");
if(!testbufferedxor8(size)) { abort(); }
printf("\n");
Expand All @@ -287,10 +216,10 @@ int main() {
}

// test small edge-case binary fuse input sizes
if(!testbinaryfuse8(0)) { abort(); }
if(!testbinaryfuse8(1)) { abort(); }
if(!testbinaryfuse8(2)) { abort(); }
if(!testbinaryfuse16(0)) { abort(); }
if(!testbinaryfuse16(1)) { abort(); }
if(!testbinaryfuse16(2)) { abort(); }
if(!testbinaryfuse8(0, 0)) { abort(); }
if(!testbinaryfuse8(1, 0)) { abort(); }
if(!testbinaryfuse8(2, 0)) { abort(); }
if(!testbinaryfuse16(0, 0)) { abort(); }
if(!testbinaryfuse16(1, 0)) { abort(); }
if(!testbinaryfuse16(2, 0)) { abort(); }
}

0 comments on commit 872a8bb

Please sign in to comment.