diff --git a/.github/workflows/ubuntu.yml b/.github/workflows/ubuntu.yml index fc38e5a..74ce192 100644 --- a/.github/workflows/ubuntu.yml +++ b/.github/workflows/ubuntu.yml @@ -1,19 +1,34 @@ -name: Ubuntu 22.04 CI (GCC 11) +name: Ubuntu 24.04 (gcc-13, clang-18) on: [push, pull_request] jobs: ubuntu-build: - runs-on: ubuntu-22.04 + runs-on: ubuntu-24.04 + strategy: + matrix: + compiler: [gcc, clang] steps: - - uses: actions/checkout@v3 - - name: Use cmake + - name: checkout code + uses: actions/checkout@v4 + - name: build with cmake run: | + if [ "${{ matrix.compiler }}" == "gcc" ]; then + export CC=gcc + export CXX=g++ + elif [ "${{ matrix.compiler }}" == "clang" ]; then + export CC=clang + export CXX=clang++ + fi mkdir build && cd build && cmake .. -DCMAKE_INSTALL_PREFIX:PATH=destination && cmake --build . && - ctest --output-on-failure && + # force failure if sanitizers report any warnings + env \ + ASAN_OPTIONS='halt_on_error=1:abort_on_error=1:print_summary=1' \ + UBSAN_OPTIONS='halt_on_error=1:abort_on_error=1:print_summary=1:print_stacktrace=1' \ + ctest --output-on-failure && cmake --install . && cd ../tests/installation_tests/find && mkdir build && cd build && cmake -DCMAKE_INSTALL_PREFIX:PATH=../../../build/destination .. && cmake --build . diff --git a/Makefile b/Makefile index 8eb4843..0e0f772 100644 --- a/Makefile +++ b/Makefile @@ -1,16 +1,17 @@ all: unit bench unit : tests/unit.c include/xorfilter.h include/binaryfusefilter.h - cc -std=c99 -O3 -o unit tests/unit.c -lm -Iinclude -Wall -Wextra -Wshadow -Wcast-qual -Wconversion -Wsign-conversion - + ${CC} -std=c99 -g -O2 -fsanitize=address,leak,undefined -o unit tests/unit.c -lm -Iinclude -Wall -Wextra -Wshadow -Wcast-qual -Wconversion -Wsign-conversion -Werror ab : tests/a.c tests/b.c - cc -std=c99 -o c tests/a.c tests/b.c -lm -Iinclude -Wall -Wextra -Wshadow -Wcast-qual -Wconversion -Wsign-conversion + ${CC} -std=c99 -o c tests/a.c tests/b.c -lm -Iinclude -Wall -Wextra -Wshadow -Wcast-qual -Wconversion -Wsign-conversion bench : benchmarks/bench.c include/xorfilter.h include/binaryfusefilter.h - cc -std=c99 -O3 -o bench benchmarks/bench.c -lm -Iinclude -Wall -Wextra -Wshadow -Wcast-qual -Wconversion -Wsign-conversion + ${CC} -std=c99 -O3 -o bench benchmarks/bench.c -lm -Iinclude -Wall -Wextra -Wshadow -Wcast-qual -Wconversion -Wsign-conversion test: unit ab + ASAN_OPTIONS='halt_on_error=1:abort_on_error=1:print_summary=1' \ + UBSAN_OPTIONS='halt_on_error=1:abort_on_error=1:print_summary=1:print_stacktrace=1' \ ./unit clean: diff --git a/include/xorfilter.h b/include/xorfilter.h index 49de6d9..d87ef65 100644 --- a/include/xorfilter.h +++ b/include/xorfilter.h @@ -157,12 +157,12 @@ static inline bool xor16_allocate(uint32_t size, xor16_t *filter) { // report memory usage static inline size_t xor8_size_in_bytes(const xor8_t *filter) { - return 3 * filter->blockLength * sizeof(uint8_t) + sizeof(xor8_t); + return 3 * (size_t)(filter->blockLength) * sizeof(uint8_t) + sizeof(xor8_t); } // report memory usage static inline size_t xor16_size_in_bytes(const xor16_t *filter) { - return 3 * filter->blockLength * sizeof(uint16_t) + sizeof(xor16_t); + return 3 * (size_t)(filter->blockLength) * sizeof(uint16_t) + sizeof(xor16_t); } // release memory @@ -449,9 +449,9 @@ static inline bool xor8_buffered_populate(uint64_t *keys, uint32_t size, xor8_t if(size == 0) { return false; } uint64_t rng_counter = 1; filter->seed = xor_rng_splitmix64(&rng_counter); - size_t arrayLength = filter->blockLength * 3; // size of the backing array + size_t arrayLength = (size_t)(filter->blockLength) * 3; // size of the backing array xor_setbuffer_t buffer0, buffer1, buffer2; - size_t blockLength = filter->blockLength; + size_t blockLength = (size_t)(filter->blockLength); bool ok0 = xor_init_buffer(&buffer0, blockLength); bool ok1 = xor_init_buffer(&buffer1, blockLength); bool ok2 = xor_init_buffer(&buffer2, blockLength); @@ -660,8 +660,8 @@ static inline bool xor8_populate(uint64_t *keys, uint32_t size, xor8_t *filter) if(size == 0) { return false; } uint64_t rng_counter = 1; filter->seed = xor_rng_splitmix64(&rng_counter); - size_t arrayLength = filter->blockLength * 3; // size of the backing array - size_t blockLength = filter->blockLength; + size_t arrayLength = (size_t)(filter->blockLength) * 3; // size of the backing array + size_t blockLength = (size_t)(filter->blockLength); xor_xorset_t *sets = (xor_xorset_t *)malloc(arrayLength * sizeof(xor_xorset_t)); @@ -867,9 +867,9 @@ static inline bool xor16_buffered_populate(uint64_t *keys, uint32_t size, xor16_ if(size == 0) { return false; } uint64_t rng_counter = 1; filter->seed = xor_rng_splitmix64(&rng_counter); - size_t arrayLength = filter->blockLength * 3; // size of the backing array + size_t arrayLength = (size_t)(filter->blockLength) * 3; // size of the backing array xor_setbuffer_t buffer0, buffer1, buffer2; - size_t blockLength = filter->blockLength; + size_t blockLength = (size_t)(filter->blockLength); bool ok0 = xor_init_buffer(&buffer0, blockLength); bool ok1 = xor_init_buffer(&buffer1, blockLength); bool ok2 = xor_init_buffer(&buffer2, blockLength); @@ -1081,8 +1081,8 @@ static inline bool xor16_populate(uint64_t *keys, uint32_t size, xor16_t *filter if(size == 0) { return false; } uint64_t rng_counter = 1; filter->seed = xor_rng_splitmix64(&rng_counter); - size_t arrayLength = filter->blockLength * 3; // size of the backing array - size_t blockLength = filter->blockLength; + size_t arrayLength = (size_t)(filter->blockLength) * 3; // size of the backing array + size_t blockLength = (size_t)(filter->blockLength); xor_xorset_t *sets = (xor_xorset_t *)malloc(arrayLength * sizeof(xor_xorset_t)); @@ -1282,12 +1282,12 @@ static inline bool xor16_populate(uint64_t *keys, uint32_t size, xor16_t *filter static inline size_t xor16_serialization_bytes(xor16_t *filter) { return sizeof(filter->seed) + sizeof(filter->blockLength) + - sizeof(uint16_t) * 3 * filter->blockLength; + sizeof(uint16_t) * 3 * (size_t)(filter->blockLength); } static inline size_t xor8_serialization_bytes(const xor8_t *filter) { return sizeof(filter->seed) + sizeof(filter->blockLength) + - sizeof(uint8_t) * 3 * filter->blockLength; + sizeof(uint8_t) * 3 * (size_t)(filter->blockLength); } // serialize a filter to a buffer, the buffer should have a capacity of at least @@ -1298,7 +1298,7 @@ static inline void xor16_serialize(const xor16_t *filter, char *buffer) { buffer += sizeof(filter->seed); memcpy(buffer, &filter->blockLength, sizeof(filter->blockLength)); buffer += sizeof(filter->blockLength); - memcpy(buffer, filter->fingerprints, filter->blockLength * 3 * sizeof(uint16_t)); + memcpy(buffer, filter->fingerprints, (size_t)(filter->blockLength) * 3 * sizeof(uint16_t)); } // serialize a filter to a buffer, the buffer should have a capacity of at least @@ -1309,7 +1309,7 @@ static inline void xor8_serialize(const xor8_t *filter, char *buffer) { buffer += sizeof(filter->seed); memcpy(buffer, &filter->blockLength, sizeof(filter->blockLength)); buffer += sizeof(filter->blockLength); - memcpy(buffer, filter->fingerprints, filter->blockLength * 3 * sizeof(uint8_t)); + memcpy(buffer, filter->fingerprints, (size_t)(filter->blockLength) * 3 * sizeof(uint8_t)); } // deserialize a filter from a buffer, returns true on success, false on failure. @@ -1322,11 +1322,11 @@ static inline bool xor16_deserialize(xor16_t * filter, const char *buffer) { buffer += sizeof(filter->seed); memcpy(&filter->blockLength, buffer, sizeof(filter->blockLength)); buffer += sizeof(filter->blockLength); - filter->fingerprints = (uint16_t*)malloc(filter->blockLength * 3 * sizeof(uint16_t)); + filter->fingerprints = (uint16_t*)malloc((size_t)(filter->blockLength) * 3 * sizeof(uint16_t)); if(filter->fingerprints == NULL) { return false; } - memcpy(filter->fingerprints, buffer, filter->blockLength * 3 * sizeof(uint16_t)); + memcpy(filter->fingerprints, buffer, (size_t)(filter->blockLength) * 3 * sizeof(uint16_t)); return true; } @@ -1341,11 +1341,11 @@ static inline bool xor8_deserialize(xor8_t * filter, const char *buffer) { buffer += sizeof(filter->seed); memcpy(&filter->blockLength, buffer, sizeof(filter->blockLength)); buffer += sizeof(filter->blockLength); - filter->fingerprints = (uint8_t*)malloc(filter->blockLength * 3 * sizeof(uint8_t)); + filter->fingerprints = (uint8_t*)malloc((size_t)(filter->blockLength) * 3 * sizeof(uint8_t)); if(filter->fingerprints == NULL) { return false; } - memcpy(filter->fingerprints, buffer, filter->blockLength * 3 * sizeof(uint8_t)); + memcpy(filter->fingerprints, buffer, (size_t)(filter->blockLength) * 3 * sizeof(uint8_t)); return true; } diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index e191a10..e23be17 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -1,3 +1,24 @@ add_executable(unit unit.c) add_test(unit unit) -target_link_libraries(unit PUBLIC xor_singleheader) +target_link_libraries(unit PRIVATE xor_singleheader) + + +# full warnings with sanitizers for tests. Include debug symbols and +# only -O2 to maintain some debugability. -Werror to +# prevent new warning creeping in Matches Makefile +if (MSVC) + # limited support for MSVC, this is not tested + list(APPEND TEST_COMPILE_OPTIONS /W4 /fsanitize=address) +else() # *nix + list(APPEND TEST_COMPILE_OPTIONS -g -O2 + -Wall -Wextra -Wshadow -Wcast-qual -Wconversion -Wsign-conversion -Werror) + + if (NOT MINGW) # sanitizers are not supported under mingw + list(APPEND TEST_COMPILE_OPTIONS -fsanitize=address,undefined,leak) + # sanitsizers need to be specified at link time as well + target_link_options(unit PRIVATE -fsanitize=address,leak,undefined) + endif() +endif() + +target_compile_options(unit PRIVATE ${TEST_COMPILE_OPTIONS}) + diff --git a/tests/unit.c b/tests/unit.c index 7a5673e..f6dbe00 100644 --- a/tests/unit.c +++ b/tests/unit.c @@ -2,33 +2,41 @@ #include "xorfilter.h" #include -// generic proxy for filter, important that this is a struct, not void -// as ยง 6.2.5..28: "All pointers to structure types shall have the -// same representation and alignment requirements as each other" -typedef struct { int dummy_; } gen_filter; - -typedef bool (*allocate_fpt)(uint32_t size, gen_filter *filter); -typedef void (*free_fpt)(gen_filter *filter); -typedef size_t (*size_in_bytes_fpt)(const gen_filter *filter); -typedef size_t (*serialization_bytes_fpt)(gen_filter *filter); -typedef void (*serialize_fpt)(gen_filter *filter, char *buffer); -typedef bool (*deserialize_fpt)(gen_filter *filter, const char *buffer); -typedef bool (*populate_fpt)(uint64_t *keys, uint32_t size, gen_filter *filter); -typedef bool (*contain_fpt)(uint64_t key, const gen_filter *filter); - -typedef void (*gfp)(void); // generic function pointer - -// generic test runner +#define FNAM(type, action) type##_##action +#define GFNAM(type, action) type##_##action##_gen + +#define F1(t, a, rt, t1, p1) rt GFNAM(t, a)(t1 p1) { return FNAM(t, a)(p1); } +#define F2(t, a, rt, t1, p1, t2, p2) rt GFNAM(t, a)(t1 p1, t2 p2) { return FNAM(t, a)(p1, p2); } +#define F3(t, a, rt, t1, p1, t2, p2, t3, p3) rt GFNAM(t, a)(t1 p1, t2 p2, t3 p3) { return FNAM(t, a)(p1, p2, p3); } + +#define GEN_THUNKS(ftype) \ + F2(ftype, allocate, bool, uint32_t, size, void*, filter) \ + F1(ftype, free, void, void*, filter) \ + F1(ftype, size_in_bytes, size_t, const void*, filter) \ + F1(ftype, serialization_bytes, size_t, void*, filter) \ + F2(ftype, serialize, void, void*, filter, char*, buffer) \ + F2(ftype, deserialize, bool, void*, filter, const char*, buffer) \ + F3(ftype, populate, bool, uint64_t*, keys, uint32_t, size, void*, filter) \ + F2(ftype, contain, bool, uint64_t, key, const void*, filter) + +GEN_THUNKS(xor8) +GEN_THUNKS(xor16) +GEN_THUNKS(binary_fuse8) +GEN_THUNKS(binary_fuse16) + +F3(xor8, buffered_populate, bool, uint64_t*, keys, uint32_t, size, void*, filter) +F3(xor16, buffered_populate, bool, uint64_t*, keys, uint32_t, size, void*, filter) + bool test(size_t size, size_t repeated_size, void *filter, - gfp allocate, - gfp free_filter, - gfp size_in_bytes, - gfp serialization_bytes, - gfp serialize, - gfp deserialize, - gfp populate, - gfp contain) { - ((allocate_fpt)allocate)((uint32_t)size, filter); + bool(*allocate)(uint32_t size, void *filter), + void (*free_filter)(void *filter), + size_t (*size_in_bytes)(const void *filter), + size_t (*serialization_bytes)(void *filter), + void (*serialize)(void *filter, char *buffer), + bool (*deserialize)(void *filter, const char *buffer), + bool (*populate)(uint64_t *keys, uint32_t size, void *filter), + bool (*contain)(uint64_t key, const void *filter)) { + allocate((uint32_t)size, filter); // we need some set of values uint64_t *big_set = (uint64_t *)malloc(sizeof(uint64_t) * size); for (size_t i = 0; i < size - repeated_size; i++) { @@ -38,22 +46,22 @@ bool test(size_t size, size_t repeated_size, void *filter, big_set[size - i - 1] = i; // we use contiguous values } // we construct the filter - if(!((populate_fpt)populate)(big_set, (uint32_t)size, filter)) { return false; } + if(!populate(big_set, (uint32_t)size, filter)) { return false; } for (size_t i = 0; i < size; i++) { - if (!((contain_fpt)contain)(big_set[i], filter)) { + if (!contain(big_set[i], filter)) { printf("bug!\n"); return false; } } - size_t buffer_size = ((serialization_bytes_fpt)serialization_bytes)(filter); + size_t buffer_size = serialization_bytes(filter); char *buffer = (char*)malloc(buffer_size); - ((serialize_fpt)serialize)(filter, buffer); - ((free_fpt)free_filter)(filter); - ((deserialize_fpt)deserialize)(filter, buffer); + serialize(filter, buffer); + free_filter(filter); + deserialize(filter, buffer); free(buffer); for (size_t i = 0; i < size; i++) { - if (!((contain_fpt)contain)(big_set[i], filter)) { + if (!(contain)(big_set[i], filter)) { printf("bug!\n"); return false; } @@ -63,7 +71,7 @@ bool test(size_t size, size_t repeated_size, void *filter, size_t trials = 10000000; for (size_t i = 0; i < trials; i++) { uint64_t random_key = ((uint64_t)rand() << 32U) + (uint64_t)rand(); - if (((contain_fpt)contain)(random_key, filter)) { + if (contain(random_key, filter)) { if (random_key >= size) { random_matches++; } @@ -71,11 +79,11 @@ bool test(size_t size, size_t repeated_size, void *filter, } double fpp = (double)random_matches * 1.0 / (double)trials; printf(" fpp %3.5f (estimated) \n", fpp); - double bpe = (double)((size_in_bytes_fpt)size_in_bytes)(filter) * 8.0 / (double)size; + double bpe = (double)size_in_bytes(filter) * 8.0 / (double)size; printf(" bits per entry %3.2f\n", bpe); printf(" bits per entry %3.2f (theoretical lower bound)\n", - log(fpp)/log(2)); printf(" efficiency ratio %3.3f \n", bpe /(- log(fpp)/log(2))); - ((free_fpt)free_filter)(filter); + free_filter(filter); free(big_set); return true; } @@ -84,14 +92,14 @@ bool testbufferedxor8(size_t size) { printf("testing buffered xor8\n"); xor8_t filter; return test(size, 0, &filter, - (gfp)xor8_allocate, - (gfp)xor8_free, - (gfp)xor8_size_in_bytes, - (gfp)xor8_serialization_bytes, - (gfp)xor8_serialize, - (gfp)xor8_deserialize, - (gfp)xor8_buffered_populate, - (gfp)xor8_contain); + xor8_allocate_gen, + xor8_free_gen, + xor8_size_in_bytes_gen, + xor8_serialization_bytes_gen, + xor8_serialize_gen, + xor8_deserialize_gen, + xor8_buffered_populate_gen, + xor8_contain_gen); } @@ -99,28 +107,28 @@ bool testxor8(size_t size) { printf("testing xor8\n"); xor8_t filter; return test(size, 0, &filter, - (gfp)xor8_allocate, - (gfp)xor8_free, - (gfp)xor8_size_in_bytes, - (gfp)xor8_serialization_bytes, - (gfp)xor8_serialize, - (gfp)xor8_deserialize, - (gfp)xor8_populate, - (gfp)xor8_contain); + xor8_allocate_gen, + xor8_free_gen, + xor8_size_in_bytes_gen, + xor8_serialization_bytes_gen, + xor8_serialize_gen, + xor8_deserialize_gen, + xor8_populate_gen, + xor8_contain_gen); } bool testxor16(size_t size) { printf("testing xor16\n"); xor16_t filter; return test(size, 0, &filter, - (gfp)xor16_allocate, - (gfp)xor16_free, - (gfp)xor16_size_in_bytes, - (gfp)xor16_serialization_bytes, - (gfp)xor16_serialize, - (gfp)xor16_deserialize, - (gfp)xor16_populate, - (gfp)xor16_contain); + xor16_allocate_gen, + xor16_free_gen, + xor16_size_in_bytes_gen, + xor16_serialization_bytes_gen, + xor16_serialize_gen, + xor16_deserialize_gen, + xor16_populate_gen, + xor16_contain_gen); } @@ -129,28 +137,28 @@ bool testbufferedxor16(size_t size) { printf("testing buffered xor16\n"); xor16_t filter; return test(size, 0, &filter, - (gfp)xor16_allocate, - (gfp)xor16_free, - (gfp)xor16_size_in_bytes, - (gfp)xor16_serialization_bytes, - (gfp)xor16_serialize, - (gfp)xor16_deserialize, - (gfp)xor16_buffered_populate, - (gfp)xor16_contain); + xor16_allocate_gen, + xor16_free_gen, + xor16_size_in_bytes_gen, + xor16_serialization_bytes_gen, + xor16_serialize_gen, + xor16_deserialize_gen, + xor16_buffered_populate_gen, + xor16_contain_gen); } bool testbinaryfuse8(size_t size, size_t repeated_size) { printf("testing binary fuse8 with size %zu and %zu duplicates\n", size, repeated_size); binary_fuse8_t filter; return test(size, repeated_size, &filter, - (gfp)binary_fuse8_allocate, - (gfp)binary_fuse8_free, - (gfp)binary_fuse8_size_in_bytes, - (gfp)binary_fuse8_serialization_bytes, - (gfp)binary_fuse8_serialize, - (gfp)binary_fuse8_deserialize, - (gfp)binary_fuse8_populate, - (gfp)binary_fuse8_contain); + binary_fuse8_allocate_gen, + binary_fuse8_free_gen, + binary_fuse8_size_in_bytes_gen, + binary_fuse8_serialization_bytes_gen, + binary_fuse8_serialize_gen, + binary_fuse8_deserialize_gen, + binary_fuse8_populate_gen, + binary_fuse8_contain_gen); } @@ -159,14 +167,14 @@ bool testbinaryfuse16(size_t size, size_t repeated_size) { printf("testing binary fuse16 with size %zu and %zu duplicates\n", size, repeated_size); binary_fuse16_t filter; return test(size, repeated_size, &filter, - (gfp)binary_fuse16_allocate, - (gfp)binary_fuse16_free, - (gfp)binary_fuse16_size_in_bytes, - (gfp)binary_fuse16_serialization_bytes, - (gfp)binary_fuse16_serialize, - (gfp)binary_fuse16_deserialize, - (gfp)binary_fuse16_populate, - (gfp)binary_fuse16_contain); + binary_fuse16_allocate_gen, + binary_fuse16_free_gen, + binary_fuse16_size_in_bytes_gen, + binary_fuse16_serialization_bytes_gen, + binary_fuse16_serialize_gen, + binary_fuse16_deserialize_gen, + binary_fuse16_populate_gen, + binary_fuse16_contain_gen); } void failure_rate_binary_fuse16() {