From da6054d8a375faf4898e51c63881656b6ef6ff68 Mon Sep 17 00:00:00 2001 From: WillisMedwell Date: Fri, 9 Feb 2024 13:09:16 +1100 Subject: [PATCH 1/3] Added: Macro SZ_NULL_CHAR, Clang-CL instrinsics. Despite declaring it as extern "C", MSVC and Clang-Cl both still enforce that you cannot convert a void*0 (r-value) to a char* for some reason. As such had to added SZ_NULL_CHAR. Clang-Cl also picks and chooses when it wants to use MSVC intrinsics over clang instrinsics so added resolution for that. Also build script was just added PIC even tho it might not be available. NOTE: None of this code is tested as, both Clang-Cl and MSVC are not able to build any of the tests on my machine despite best efforts. --- CMakeLists.txt | 12 +++-- include/stringzilla/stringzilla.h | 77 ++++++++++++++++++------------- 2 files changed, 54 insertions(+), 35 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 8af33f09..5af8ee17 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -7,7 +7,8 @@ project( HOMEPAGE_URL "https://github.com/ashvardanian/stringzilla") set(CMAKE_C_STANDARD 99) -set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD 17) # This gives many issues for msvc and clang-cl, especially if later on you set it to std-c++11 later on in the tests... + set(CMAKE_C_EXTENSIONS OFF) set(CMAKE_CXX_EXTENSIONS OFF) set(CMAKE_COMPILE_WARNING_AS_ERROR) @@ -144,9 +145,12 @@ function(set_compiler_flags target cpp_standard target_arch) "$<$,$,$>>:/Zi>" ) - # Enable Position Independent Code - target_compile_options(${target} PRIVATE "$<$:-fPIC>") - target_link_options(${target} PRIVATE "$<$:-fPIC>") + # If available, enable Position Independent Code + if(CMAKE_POSITION_INDEPENDENT_CODE) + target_compile_options(${target} PRIVATE "$<$:-fPIC>") + target_link_options(${target} PRIVATE "$<$:-fPIC>") + endif() + # Check for ${target_arch} and set it or use "march=native" if not defined if("${target_arch}" STREQUAL "") diff --git a/include/stringzilla/stringzilla.h b/include/stringzilla/stringzilla.h index 283b0ca7..6f9df0c7 100644 --- a/include/stringzilla/stringzilla.h +++ b/include/stringzilla/stringzilla.h @@ -1093,6 +1093,7 @@ SZ_PUBLIC sz_cptr_t sz_rfind_charset_neon(sz_cptr_t text, sz_size_t length, sz_c #define SZ_NULL __null #else #define SZ_NULL ((void *)0) +#define SZ_NULL_CHAR ((char*)0) #endif /** @@ -1121,17 +1122,31 @@ SZ_PUBLIC sz_cptr_t sz_rfind_charset_neon(sz_cptr_t text, sz_size_t length, sz_c #endif /* - * Intrinsics aliases for MSVC, GCC, and Clang. + * Intrinsics aliases for MSVC, GCC, and Clang. (and Clang-Cl) */ +#if defined(_MSC_VER) && defined(__clang__) +#include +SZ_INTERNAL sz_size_t sz_u64_clz(sz_u64_t x) { return __tzcnt_u64(x); } +SZ_INTERNAL int sz_u32_ctz(sz_u32_t x) { return __tzcnt_u32(x); } +SZ_INTERNAL int sz_u32_clz(sz_u32_t x) { return __lzcnt(x); } +#elif defined(_MSC_VER) +#include +SZ_INTERNAL sz_size_t sz_u64_clz(sz_u64_t x) { return _lzcnt_u64(x); } +SZ_INTERNAL int sz_u32_ctz(sz_u32_t x) { return _tzcnt_u32(x); } +SZ_INTERNAL int sz_u32_clz(sz_u32_t x) { return _lzcnt_u32(x); } +#endif + #if defined(_MSC_VER) + #include + SZ_INTERNAL sz_size_t sz_u64_popcount(sz_u64_t x) { return __popcnt64(x); } SZ_INTERNAL sz_size_t sz_u64_ctz(sz_u64_t x) { return _tzcnt_u64(x); } -SZ_INTERNAL sz_size_t sz_u64_clz(sz_u64_t x) { return _lzcnt_u64(x); } + SZ_INTERNAL sz_u64_t sz_u64_bytes_reverse(sz_u64_t val) { return _byteswap_uint64(val); } SZ_INTERNAL int sz_u32_popcount(sz_u32_t x) { return __popcnt(x); } -SZ_INTERNAL int sz_u32_ctz(sz_u32_t x) { return _tzcnt_u32(x); } -SZ_INTERNAL int sz_u32_clz(sz_u32_t x) { return _lzcnt_u32(x); } + + SZ_INTERNAL sz_u32_t sz_u32_bytes_reverse(sz_u32_t val) { return _byteswap_ulong(val); } #else SZ_INTERNAL int sz_u64_popcount(sz_u64_t x) { return __builtin_popcountll(x); } @@ -1282,7 +1297,7 @@ SZ_INTERNAL sz_u16_vec_t sz_u16_load(sz_cptr_t ptr) { result.u8s[0] = ptr[0]; result.u8s[1] = ptr[1]; return result; -#elif defined(_MSC_VER) +#elif defined(_MSC_VER) && !defined(__clang__) return *((__unaligned sz_u16_vec_t *)ptr); #else __attribute__((aligned(1))) sz_u16_vec_t const *result = (sz_u16_vec_t const *)ptr; @@ -1311,7 +1326,7 @@ SZ_INTERNAL sz_u32_vec_t sz_u32_load(sz_cptr_t ptr) { result.u8s[2] = ptr[2]; result.u8s[3] = ptr[3]; return result; -#elif defined(_MSC_VER) +#elif defined(_MSC_VER) && !defined(__clang__) return *((__unaligned sz_u32_vec_t *)ptr); #else __attribute__((aligned(1))) sz_u32_vec_t const *result = (sz_u32_vec_t const *)ptr; @@ -1345,7 +1360,7 @@ SZ_INTERNAL sz_u64_vec_t sz_u64_load(sz_cptr_t ptr) { result.u8s[6] = ptr[6]; result.u8s[7] = ptr[7]; return result; -#elif defined(_MSC_VER) +#elif defined(_MSC_VER) && !defined(__clang__) return *((__unaligned sz_u64_vec_t *)ptr); #else __attribute__((aligned(1))) sz_u64_vec_t const *result = (sz_u64_vec_t const *)ptr; @@ -1358,7 +1373,7 @@ SZ_INTERNAL sz_ptr_t _sz_memory_allocate_fixed(sz_size_t length, void *handle) { sz_size_t capacity; sz_copy((sz_ptr_t)&capacity, (sz_cptr_t)handle, sizeof(sz_size_t)); sz_size_t consumed_capacity = sizeof(sz_size_t); - if (consumed_capacity + length > capacity) return SZ_NULL; + if (consumed_capacity + length > capacity) return SZ_NULL_CHAR; return (sz_ptr_t)handle + consumed_capacity; } @@ -1438,7 +1453,7 @@ SZ_INTERNAL void _sz_locate_needle_anomalies(sz_cptr_t start, sz_size_t length, #if !SZ_AVOID_LIBC #include // `fprintf` #include // `malloc`, `EXIT_FAILURE` -#else +#elif defined(MSVC) && !defined(__clang__) extern void *malloc(size_t); extern void free(void *); #endif @@ -1471,7 +1486,7 @@ SZ_PUBLIC sz_bool_t sz_equal_serial(sz_cptr_t a, sz_cptr_t b, sz_size_t length) SZ_PUBLIC sz_cptr_t sz_find_charset_serial(sz_cptr_t text, sz_size_t length, sz_charset_t const *set) { for (sz_cptr_t const end = text + length; text != end; ++text) if (sz_charset_contains(set, *text)) return text; - return SZ_NULL; + return SZ_NULL_CHAR; } SZ_PUBLIC sz_cptr_t sz_rfind_charset_serial(sz_cptr_t text, sz_size_t length, sz_charset_t const *set) { @@ -1480,7 +1495,7 @@ SZ_PUBLIC sz_cptr_t sz_rfind_charset_serial(sz_cptr_t text, sz_size_t length, sz sz_cptr_t const end = text; for (text += length; text != end;) if (sz_charset_contains(set, *(text -= 1))) return text; - return SZ_NULL; + return SZ_NULL_CHAR; #pragma GCC diagnostic pop } @@ -1522,7 +1537,7 @@ SZ_INTERNAL sz_u64_vec_t _sz_u64_each_byte_equal(sz_u64_vec_t a, sz_u64_vec_t b) */ SZ_PUBLIC sz_cptr_t sz_find_byte_serial(sz_cptr_t h, sz_size_t h_length, sz_cptr_t n) { - if (!h_length) return SZ_NULL; + if (!h_length) return SZ_NULL_CHAR; sz_cptr_t const h_end = h + h_length; #if !SZ_USE_MISALIGNED_LOADS @@ -1545,7 +1560,7 @@ SZ_PUBLIC sz_cptr_t sz_find_byte_serial(sz_cptr_t h, sz_size_t h_length, sz_cptr // Handle the misaligned tail. for (; h < h_end; ++h) if (*h == *n) return h; - return SZ_NULL; + return SZ_NULL_CHAR; } /** @@ -1555,7 +1570,7 @@ SZ_PUBLIC sz_cptr_t sz_find_byte_serial(sz_cptr_t h, sz_size_t h_length, sz_cptr */ sz_cptr_t sz_rfind_byte_serial(sz_cptr_t h, sz_size_t h_length, sz_cptr_t n) { - if (!h_length) return SZ_NULL; + if (!h_length) return SZ_NULL_CHAR; sz_cptr_t const h_start = h; // Reposition the `h` pointer to the end, as we will be walking backwards. @@ -1579,7 +1594,7 @@ sz_cptr_t sz_rfind_byte_serial(sz_cptr_t h, sz_size_t h_length, sz_cptr_t n) { for (; h >= h_start; --h) if (*h == *n) return h; - return SZ_NULL; + return SZ_NULL_CHAR; } /** @@ -1633,7 +1648,7 @@ SZ_INTERNAL sz_cptr_t _sz_find_2byte_serial(sz_cptr_t h, sz_size_t h_length, sz_ for (; h + 2 <= h_end; ++h) if ((h[0] == n[0]) + (h[1] == n[1]) == 2) return h; - return SZ_NULL; + return SZ_NULL_CHAR; } /** @@ -1697,7 +1712,7 @@ SZ_INTERNAL sz_cptr_t _sz_find_4byte_serial(sz_cptr_t h, sz_size_t h_length, sz_ for (; h + 4 <= h_end; ++h) if ((h[0] == n[0]) + (h[1] == n[1]) + (h[2] == n[2]) + (h[3] == n[3]) == 4) return h; - return SZ_NULL; + return SZ_NULL_CHAR; } /** @@ -1768,7 +1783,7 @@ SZ_INTERNAL sz_cptr_t _sz_find_3byte_serial(sz_cptr_t h, sz_size_t h_length, sz_ for (; h + 3 <= h_end; ++h) if ((h[0] == n[0]) + (h[1] == n[1]) + (h[2] == n[2]) == 3) return h; - return SZ_NULL; + return SZ_NULL_CHAR; } /** @@ -1821,7 +1836,7 @@ SZ_INTERNAL sz_cptr_t _sz_find_horspool_upto_256bytes_serial(sz_cptr_t h_chars, if (h_vec.u32 == n_vec.u32 && sz_equal((sz_cptr_t)h + i, n_chars, n_length)) return (sz_cptr_t)h + i; i += bad_shift_table.jumps[h[i + n_length - 1]]; } - return SZ_NULL; + return SZ_NULL_CHAR; } /** @@ -1872,7 +1887,7 @@ SZ_INTERNAL sz_cptr_t _sz_rfind_horspool_upto_256bytes_serial(sz_cptr_t h_chars, if (h_vec.u32 == n_vec.u32 && sz_equal((sz_cptr_t)h + i, n_chars, n_length)) return (sz_cptr_t)h + i; j += bad_shift_table.jumps[h[i]]; } - return SZ_NULL; + return SZ_NULL_CHAR; } /** @@ -1885,11 +1900,11 @@ SZ_INTERNAL sz_cptr_t _sz_find_with_prefix(sz_cptr_t h, sz_size_t h_length, sz_c sz_size_t suffix_length = n_length - prefix_length; while (1) { sz_cptr_t found = find_prefix(h, h_length, n, prefix_length); - if (!found) return SZ_NULL; + if (!found) return SZ_NULL_CHAR; // Verify the remaining part of the needle sz_size_t remaining = h_length - (found - h); - if (remaining < suffix_length) return SZ_NULL; + if (remaining < suffix_length) return SZ_NULL_CHAR; if (sz_equal(found + prefix_length, n + prefix_length, suffix_length)) return found; // Adjust the position. @@ -1898,7 +1913,7 @@ SZ_INTERNAL sz_cptr_t _sz_find_with_prefix(sz_cptr_t h, sz_size_t h_length, sz_c } // Unreachable, but helps silence compiler warnings: - return SZ_NULL; + return SZ_NULL_CHAR; } /** @@ -1911,11 +1926,11 @@ SZ_INTERNAL sz_cptr_t _sz_rfind_with_suffix(sz_cptr_t h, sz_size_t h_length, sz_ sz_size_t prefix_length = n_length - suffix_length; while (1) { sz_cptr_t found = find_suffix(h, h_length, n + prefix_length, suffix_length); - if (!found) return SZ_NULL; + if (!found) return SZ_NULL_CHAR; // Verify the remaining part of the needle sz_size_t remaining = found - h; - if (remaining < prefix_length) return SZ_NULL; + if (remaining < prefix_length) return SZ_NULL_CHAR; if (sz_equal(found - prefix_length, n, prefix_length)) return found - prefix_length; // Adjust the position. @@ -1923,7 +1938,7 @@ SZ_INTERNAL sz_cptr_t _sz_rfind_with_suffix(sz_cptr_t h, sz_size_t h_length, sz_ } // Unreachable, but helps silence compiler warnings: - return SZ_NULL; + return SZ_NULL_CHAR; } SZ_INTERNAL sz_cptr_t _sz_find_over_4bytes_serial(sz_cptr_t h, sz_size_t h_length, sz_cptr_t n, sz_size_t n_length) { @@ -1943,7 +1958,7 @@ SZ_INTERNAL sz_cptr_t _sz_rfind_horspool_over_256bytes_serial(sz_cptr_t h, sz_si SZ_PUBLIC sz_cptr_t sz_find_serial(sz_cptr_t h, sz_size_t h_length, sz_cptr_t n, sz_size_t n_length) { // This almost never fires, but it's better to be safe than sorry. - if (h_length < n_length || !n_length) return SZ_NULL; + if (h_length < n_length || !n_length) return SZ_NULL_CHAR; sz_find_t backends[] = { // For very short strings brute-force SWAR makes sense. @@ -1970,7 +1985,7 @@ SZ_PUBLIC sz_cptr_t sz_find_serial(sz_cptr_t h, sz_size_t h_length, sz_cptr_t n, SZ_PUBLIC sz_cptr_t sz_rfind_serial(sz_cptr_t h, sz_size_t h_length, sz_cptr_t n, sz_size_t n_length) { // This almost never fires, but it's better to be safe than sorry. - if (h_length < n_length || !n_length) return SZ_NULL; + if (h_length < n_length || !n_length) return SZ_NULL_CHAR; sz_find_t backends[] = { // For very short strings brute-force SWAR makes sense. @@ -2670,7 +2685,7 @@ SZ_PUBLIC sz_ptr_t sz_string_init_length(sz_string_t *string, sz_size_t length, else { // If we are not lucky, we need to allocate memory. string->external.start = (sz_ptr_t)allocator->allocate(space_needed, allocator->handle); - if (!string->external.start) return SZ_NULL; + if (!string->external.start) return SZ_NULL_CHAR; string->external.length = length; string->external.space = space_needed; } @@ -2694,7 +2709,7 @@ SZ_PUBLIC sz_ptr_t sz_string_reserve(sz_string_t *string, sz_size_t new_capacity sz_assert(new_space > string_space && "New space must be larger than current."); sz_ptr_t new_start = (sz_ptr_t)allocator->allocate(new_space, allocator->handle); - if (!new_start) return SZ_NULL; + if (!new_start) return SZ_NULL_CHAR; sz_copy(new_start, string_start, string_length); string->external.start = new_start; @@ -2734,7 +2749,7 @@ SZ_PUBLIC sz_ptr_t sz_string_expand(sz_string_t *string, sz_size_t offset, sz_si sz_size_t min_needed_space = sz_size_bit_ceil(offset + string_length + added_length + 1); sz_size_t new_space = sz_max_of_two(min_needed_space, next_planned_size); string_start = sz_string_reserve(string, new_space - 1, allocator); - if (!string_start) return SZ_NULL; + if (!string_start) return SZ_NULL_CHAR; // Copy into the new buffer. sz_move(string_start + offset + added_length, string_start + offset, string_length - offset); From c2a90a19b0d5c7fb9a7156576b19b9d73088382f Mon Sep 17 00:00:00 2001 From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com> Date: Sun, 11 Feb 2024 23:19:45 +0000 Subject: [PATCH 2/3] Fix: missing `SZ_NULL_CHAR` for GCC builds --- include/stringzilla/stringzilla.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/stringzilla/stringzilla.h b/include/stringzilla/stringzilla.h index 6f9df0c7..0456af62 100644 --- a/include/stringzilla/stringzilla.h +++ b/include/stringzilla/stringzilla.h @@ -1091,9 +1091,10 @@ SZ_PUBLIC sz_cptr_t sz_rfind_charset_neon(sz_cptr_t text, sz_size_t length, sz_c */ #ifdef __GNUG__ #define SZ_NULL __null +#define SZ_NULL_CHAR __null #else #define SZ_NULL ((void *)0) -#define SZ_NULL_CHAR ((char*)0) +#define SZ_NULL_CHAR ((char *)0) #endif /** From eafecb0ffd2a24b212e6e33646f4ae1b3fbbc493 Mon Sep 17 00:00:00 2001 From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com> Date: Tue, 13 Feb 2024 21:43:36 -0800 Subject: [PATCH 3/3] Improve: Re-group compiler intrinsics --- include/stringzilla/stringzilla.h | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/include/stringzilla/stringzilla.h b/include/stringzilla/stringzilla.h index 0456af62..3c8ea578 100644 --- a/include/stringzilla/stringzilla.h +++ b/include/stringzilla/stringzilla.h @@ -1122,41 +1122,40 @@ SZ_PUBLIC sz_cptr_t sz_rfind_charset_neon(sz_cptr_t text, sz_size_t length, sz_c #define sz_assert(condition) ((void)0) #endif -/* - * Intrinsics aliases for MSVC, GCC, and Clang. (and Clang-Cl) +/* Intrinsics aliases for MSVC, GCC, Clang, and Clang-Cl. + * The following section of compiler intrinsics comes in 3 flavors. */ -#if defined(_MSC_VER) && defined(__clang__) +#if defined(_MSC_VER) && defined(__clang__) // Clang-Cl on Windows. #include SZ_INTERNAL sz_size_t sz_u64_clz(sz_u64_t x) { return __tzcnt_u64(x); } SZ_INTERNAL int sz_u32_ctz(sz_u32_t x) { return __tzcnt_u32(x); } SZ_INTERNAL int sz_u32_clz(sz_u32_t x) { return __lzcnt(x); } -#elif defined(_MSC_VER) +#elif defined(_MSC_VER) // Other compilers on Windows. #include SZ_INTERNAL sz_size_t sz_u64_clz(sz_u64_t x) { return _lzcnt_u64(x); } SZ_INTERNAL int sz_u32_ctz(sz_u32_t x) { return _tzcnt_u32(x); } SZ_INTERNAL int sz_u32_clz(sz_u32_t x) { return _lzcnt_u32(x); } +#else // GCC and Clang-based compilers. +SZ_INTERNAL int sz_u64_clz(sz_u64_t x) { return __builtin_clzll(x); } +SZ_INTERNAL int sz_u32_ctz(sz_u32_t x) { return __builtin_ctz(x); } // ! Undefined if `x == 0` +SZ_INTERNAL int sz_u32_clz(sz_u32_t x) { return __builtin_clz(x); } // ! Undefined if `x == 0` #endif +/* Intrinsics aliases for MSVC, GCC, Clang, and Clang-Cl. + * The following section of compiler intrinsics comes in 2 flavors. + */ #if defined(_MSC_VER) - #include - SZ_INTERNAL sz_size_t sz_u64_popcount(sz_u64_t x) { return __popcnt64(x); } SZ_INTERNAL sz_size_t sz_u64_ctz(sz_u64_t x) { return _tzcnt_u64(x); } - SZ_INTERNAL sz_u64_t sz_u64_bytes_reverse(sz_u64_t val) { return _byteswap_uint64(val); } SZ_INTERNAL int sz_u32_popcount(sz_u32_t x) { return __popcnt(x); } - - SZ_INTERNAL sz_u32_t sz_u32_bytes_reverse(sz_u32_t val) { return _byteswap_ulong(val); } #else SZ_INTERNAL int sz_u64_popcount(sz_u64_t x) { return __builtin_popcountll(x); } SZ_INTERNAL int sz_u64_ctz(sz_u64_t x) { return __builtin_ctzll(x); } -SZ_INTERNAL int sz_u64_clz(sz_u64_t x) { return __builtin_clzll(x); } SZ_INTERNAL sz_u64_t sz_u64_bytes_reverse(sz_u64_t val) { return __builtin_bswap64(val); } SZ_INTERNAL int sz_u32_popcount(sz_u32_t x) { return __builtin_popcount(x); } -SZ_INTERNAL int sz_u32_ctz(sz_u32_t x) { return __builtin_ctz(x); } // ! Undefined if `x == 0` -SZ_INTERNAL int sz_u32_clz(sz_u32_t x) { return __builtin_clz(x); } // ! Undefined if `x == 0` SZ_INTERNAL sz_u32_t sz_u32_bytes_reverse(sz_u32_t val) { return __builtin_bswap32(val); } #endif