diff --git a/deps/simdutf/simdutf.cpp b/deps/simdutf/simdutf.cpp index e469d44466c56b..9597a3727cd11f 100644 --- a/deps/simdutf/simdutf.cpp +++ b/deps/simdutf/simdutf.cpp @@ -1,4 +1,4 @@ -/* auto-generated on 2024-07-11 00:01:58 -0400. Do not edit! */ +/* auto-generated on 2024-07-31 17:22:10 -0400. Do not edit! */ /* begin file src/simdutf.cpp */ #include "simdutf.h" // We include base64_tables once. @@ -2140,7 +2140,7 @@ SIMDUTF_POP_DISABLE_WARNINGS #endif -// Default Haswell to on if this is x86-64. Even if we're not compiled for it, it could be selected +// Default Haswell to on if this is x86-64. Even if we are not compiled for it, it could be selected // at runtime. #ifndef SIMDUTF_IMPLEMENTATION_HASWELL // @@ -5695,6 +5695,10 @@ result base64_tail_decode(char *dst, const char_type *src, size_t length, base64 // This functions assumes that the padding (=) has been removed. template result base64_tail_decode_safe(char *dst, size_t& outlen, const char_type *src, size_t length, base64_options options) { + if(length == 0) { + outlen = 0; + return {SUCCESS, 0}; + } // This looks like 5 branches, but we expect the compiler to resolve this to a single branch: const uint8_t *to_base64 = (options & base64_url) ? tables::base64::to_base64_url_value : tables::base64::to_base64_value; const uint32_t *d0 = (options & base64_url) ? tables::base64::base64_url::d0 : tables::base64::base64_default::d0; @@ -5717,7 +5721,7 @@ result base64_tail_decode_safe(char *dst, size_t& outlen, const char_type *src, if(match_system(endianness::BIG)) { x = scalar::utf32::swap_bytes(x); } - if(dst + 3 > dstend) { + if(dstend - dst < 3) { outlen = size_t(dst - dstinit); return {OUTPUT_BUFFER_TOO_SMALL, size_t(src - srcinit)}; } @@ -5785,7 +5789,7 @@ result base64_tail_decode_safe(char *dst, size_t& outlen, const char_type *src, outlen = size_t(dst - dstinit); return {SUCCESS, size_t(dst - dstinit)}; } - if(dst + 3 >= dstend) { + if(dstend - dst <= 3) { outlen = size_t(dst - dstinit); return {OUTPUT_BUFFER_TOO_SMALL, size_t(srccur - srcinit)}; } @@ -5937,7 +5941,7 @@ namespace internal { + SIMDUTF_IMPLEMENTATION_ARM64 + SIMDUTF_IMPLEMENTATION_PPC64 \ + SIMDUTF_IMPLEMENTATION_FALLBACK == 1) -// Static array of known implementations. We're hoping these get baked into the executable +// Static array of known implementations. We are hoping these get baked into the executable // without requiring a static initializer. @@ -13729,7 +13733,7 @@ inline size_t convert(const char* buf, size_t len, char* latin_output) { // range check - uint32_t code_point = (leading_byte & 0b00011111) << 6 | (data[pos + 1] & 0b00111111); // assembles the Unicode code point from the two bytes. It does this by discarding the leading 110 and 10 bits from the two bytes, shifting the remaining bits of the first byte, and then combining the results with a bitwise OR operation. if (code_point < 0x80 || 0xFF < code_point) { - return 0; // We only care about the range 129-255 which is Non-ASCII latin1 characters. A code_point beneath 0x80 is invalid as it's already covered by bytes whose leading bit is zero. + return 0; // We only care about the range 129-255 which is Non-ASCII latin1 characters. A code_point beneath 0x80 is invalid as it is already covered by bytes whose leading bit is zero. } *latin_output++ = char(code_point); pos += 2; @@ -13863,6 +13867,7 @@ namespace utf16_to_latin1 { template inline size_t convert(const char16_t* buf, size_t len, char* latin_output) { + if(len == 0) { return 0; } const uint16_t *data = reinterpret_cast(buf); size_t pos = 0; std::vector temp_output(len); @@ -13886,6 +13891,7 @@ inline size_t convert(const char16_t* buf, size_t len, char* latin_output) { template inline result convert_with_errors(const char16_t* buf, size_t len, char* latin_output) { + if(len == 0) { return result(error_code::SUCCESS,0); } const uint16_t *data = reinterpret_cast(buf); size_t pos = 0; char* start{latin_output}; @@ -15345,7 +15351,7 @@ std::pair arm_convert_utf16_to_latin1_with_errors(const char16_t* Ad 1. When values are less than 0x0800, it means that a 16-bit code unit - can be converted into: 1) single UTF8 byte (when it's an ASCII + can be converted into: 1) single UTF8 byte (when it is an ASCII char) or 2) two UTF8 bytes. For this case we do only some shuffle to obtain these 2-byte @@ -15897,7 +15903,7 @@ std::pair arm_convert_utf16_to_utf8_with_errors(const char16_t* b Ad 1. When values are less than 0x0800, it means that a 16-bit code unit - can be converted into: 1) single UTF8 byte (when it's an ASCII + can be converted into: 1) single UTF8 byte (when it is an ASCII char) or 2) two UTF8 bytes. For this case we do only some shuffle to obtain these 2-byte @@ -17280,7 +17286,7 @@ simdutf_really_inline const uint8_t *buf_block_reader::full_block() c template simdutf_really_inline size_t buf_block_reader::get_remainder(uint8_t *dst) const { if(len == idx) { return 0; } // memcpy(dst, null, 0) will trigger an error with some sanitizers - std::memset(dst, 0x20, STEP_SIZE); // std::memset STEP_SIZE because it's more efficient to write out 8 or 16 bytes at once. + std::memset(dst, 0x20, STEP_SIZE); // std::memset STEP_SIZE because it is more efficient to write out 8 or 16 bytes at once. std::memcpy(dst, buf + idx, len - idx); return len - idx; } @@ -17403,7 +17409,7 @@ using namespace simd; // // Return nonzero if there are incomplete multibyte characters at the end of the block: - // e.g. if there is a 4-byte character, but it's 3 bytes from the end. + // e.g. if there is a 4-byte character, but it is 3 bytes from the end. // simdutf_really_inline simd8 is_incomplete(const simd8 input) { // If the previous input's last 3 bytes match this, they're too short (they ended at EOF): @@ -20978,7 +20984,7 @@ simdutf_really_inline __m512i check_special_cases(__m512i input, const __m512i p } // // Return nonzero if there are incomplete multibyte characters at the end of the block: - // e.g. if there is a 4-byte character, but it's 3 bytes from the end. + // e.g. if there is a 4-byte character, but it is 3 bytes from the end. // simdutf_really_inline __m512i is_incomplete(const __m512i input) { // If the previous input's last 3 bytes match this, they're too short (they ended at EOF): @@ -21742,7 +21748,7 @@ size_t utf16_to_utf8_avx512i(const char16_t *inbuf, size_t inlen, const uint32_t lonohi = l & ~(h + h + carry); const uint32_t hinolo = h & ~(l >> 1); inlen = _tzcnt_u32(hinolo | lonohi); - inmask = __mmask32(0x7fffffff & ((1 << inlen) - 1)); + inmask = __mmask32(0x7fffffff & ((1U << inlen) - 1)); in = _mm512_maskz_mov_epi16(inmask, in); adjust = (int)inlen - 31; inlen = 0; @@ -21808,7 +21814,7 @@ size_t utf16_to_utf8_avx512i(const char16_t *inbuf, size_t inlen, tail: if (inlen != 0) { // We must have inlen < 31. - inmask = _cvtu32_mask32((1 << inlen) - 1); + inmask = _cvtu32_mask32((1U << inlen) - 1); in = _mm512_maskz_loadu_epi16(inmask, inbuf); if(big_endian) { in = _mm512_shuffle_epi8(in, byteflip); } adjust = inlen - 31; @@ -23426,7 +23432,7 @@ simdutf_warn_unused bool implementation::validate_utf16le(const char16_t *buf, s } } if(buf < end) { - __m512i in = _mm512_maskz_loadu_epi16((1<<(end-buf))-1,(__m512i*)buf); + __m512i in = _mm512_maskz_loadu_epi16((1U<<(end-buf))-1,(__m512i*)buf); __m512i diff = _mm512_sub_epi16(in, _mm512_set1_epi16(uint16_t(0xD800))); __mmask32 surrogates = _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0800))); if(surrogates) { @@ -23475,7 +23481,7 @@ simdutf_warn_unused bool implementation::validate_utf16be(const char16_t *buf, s } } if(buf < end) { - __m512i in = _mm512_shuffle_epi8(_mm512_maskz_loadu_epi16((1<<(end-buf))-1,(__m512i*)buf), byteflip); + __m512i in = _mm512_shuffle_epi8(_mm512_maskz_loadu_epi16((1U<<(end-buf))-1,(__m512i*)buf), byteflip); __m512i diff = _mm512_sub_epi16(in, _mm512_set1_epi16(uint16_t(0xD800))); __mmask32 surrogates = _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0800))); if(surrogates) { @@ -23517,7 +23523,7 @@ simdutf_warn_unused result implementation::validate_utf16le_with_errors(const ch } } if(buf < end) { - __m512i in = _mm512_maskz_loadu_epi16((1<<(end-buf))-1,(__m512i*)buf); + __m512i in = _mm512_maskz_loadu_epi16((1U<<(end-buf))-1,(__m512i*)buf); __m512i diff = _mm512_sub_epi16(in, _mm512_set1_epi16(uint16_t(0xD800))); __mmask32 surrogates = _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0800))); if(surrogates) { @@ -23571,7 +23577,7 @@ simdutf_warn_unused result implementation::validate_utf16be_with_errors(const ch } } if(buf < end) { - __m512i in = _mm512_shuffle_epi8(_mm512_maskz_loadu_epi16((1<<(end-buf))-1,(__m512i*)buf), byteflip); + __m512i in = _mm512_shuffle_epi8(_mm512_maskz_loadu_epi16((1U<<(end-buf))-1,(__m512i*)buf), byteflip); __m512i diff = _mm512_sub_epi16(in, _mm512_set1_epi16(uint16_t(0xD800))); __mmask32 surrogates = _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0800))); if(surrogates) { @@ -23598,43 +23604,52 @@ simdutf_warn_unused bool implementation::validate_utf32(const char32_t *buf, siz } simdutf_warn_unused result implementation::validate_utf32_with_errors(const char32_t *buf, size_t len) const noexcept { + const char32_t* end = len >= 16 ? buf + len - 16 : nullptr; + const char32_t* buf_orig = buf; + while (buf <= end) { + __m512i utf32 = _mm512_loadu_si512((const __m512i*)buf); + __mmask16 outside_range = _mm512_cmp_epu32_mask(utf32, _mm512_set1_epi32(0x10ffff), + _MM_CMPINT_GT); - const char32_t* end = len >= 16 ? buf + len - 16 : nullptr; - const char32_t* buf_orig = buf; - while (buf <= end) { - __m512i utf32 = _mm512_loadu_si512((const __m512i*)buf); - __mmask16 outside_range = _mm512_cmp_epu32_mask(utf32, _mm512_set1_epi32(0x10ffff), - _MM_CMPINT_GT); - if (outside_range) { - return result(error_code::TOO_LARGE, buf - buf_orig + _tzcnt_u32(outside_range)); - } + __m512i utf32_off = _mm512_add_epi32(utf32, _mm512_set1_epi32(0xffff2000)); - __m512i utf32_off = _mm512_add_epi32(utf32, _mm512_set1_epi32(0xffff2000)); + __mmask16 surrogate_range = _mm512_cmp_epu32_mask(utf32_off, _mm512_set1_epi32(0xfffff7ff), + _MM_CMPINT_GT); + if((outside_range | surrogate_range)) { + auto outside_idx = _tzcnt_u32(outside_range); + auto surrogate_idx = _tzcnt_u32(surrogate_range); - __mmask16 surrogate_range = _mm512_cmp_epu32_mask(utf32_off, _mm512_set1_epi32(0xfffff7ff), - _MM_CMPINT_GT); - if (surrogate_range) { - return result(error_code::SURROGATE, buf - buf_orig + _tzcnt_u32(surrogate_range)); + if (outside_idx < surrogate_idx) { + return result(error_code::TOO_LARGE, buf - buf_orig + outside_idx); } - buf += 16; + + return result(error_code::SURROGATE, buf - buf_orig + surrogate_idx); } - if(buf < buf_orig + len) { - __m512i utf32 = _mm512_maskz_loadu_epi32(__mmask16((1<<(buf_orig + len - buf))-1),(const __m512i*)buf); - __mmask16 outside_range = _mm512_cmp_epu32_mask(utf32, _mm512_set1_epi32(0x10ffff), - _MM_CMPINT_GT); - if (outside_range) { - return result(error_code::TOO_LARGE, buf - buf_orig + _tzcnt_u32(outside_range)); - } - __m512i utf32_off = _mm512_add_epi32(utf32, _mm512_set1_epi32(0xffff2000)); - __mmask16 surrogate_range = _mm512_cmp_epu32_mask(utf32_off, _mm512_set1_epi32(0xfffff7ff), - _MM_CMPINT_GT); - if (surrogate_range) { - return result(error_code::SURROGATE, buf - buf_orig + _tzcnt_u32(surrogate_range)); + buf += 16; + } + if(buf < buf_orig + len) { + __m512i utf32 = _mm512_maskz_loadu_epi32(__mmask16((1U<<(buf_orig + len - buf))-1),(const __m512i*)buf); + __mmask16 outside_range = _mm512_cmp_epu32_mask(utf32, _mm512_set1_epi32(0x10ffff), + _MM_CMPINT_GT); + __m512i utf32_off = _mm512_add_epi32(utf32, _mm512_set1_epi32(0xffff2000)); + + __mmask16 surrogate_range = _mm512_cmp_epu32_mask(utf32_off, _mm512_set1_epi32(0xfffff7ff), + _MM_CMPINT_GT); + if((outside_range | surrogate_range)) { + auto outside_idx = _tzcnt_u32(outside_range); + auto surrogate_idx = _tzcnt_u32(surrogate_range); + + if (outside_idx < surrogate_idx) { + return result(error_code::TOO_LARGE, buf - buf_orig + outside_idx); } + + return result(error_code::SURROGATE, buf - buf_orig + surrogate_idx); } - return result(error_code::SUCCESS, len); + } + + return result(error_code::SUCCESS, len); } simdutf_warn_unused size_t implementation::convert_latin1_to_utf8(const char * buf, size_t len, char* utf8_output) const noexcept { @@ -24176,7 +24191,7 @@ void implementation::change_endianness_utf16(const char16_t * input, size_t leng pos += 32; } if(pos < length) { - __mmask32 m((1<< (length - pos))-1); + __mmask32 m((1U<< (length - pos))-1); __m512i utf16 = _mm512_maskz_loadu_epi16(m, (const __m512i*)(input + pos)); utf16 = _mm512_shuffle_epi8(utf16, byteflip); _mm512_mask_storeu_epi16(output + pos, m, utf16); @@ -25612,7 +25627,7 @@ avx2_convert_utf16_to_latin1_with_errors(const char16_t *buf, size_t len, Ad 1. When values are less than 0x0800, it means that a 16-bit code unit - can be converted into: 1) single UTF8 byte (when it's an ASCII + can be converted into: 1) single UTF8 byte (when it is an ASCII char) or 2) two UTF8 bytes. For this case we do only some shuffle to obtain these 2-byte @@ -26144,7 +26159,7 @@ std::pair avx2_convert_utf16_to_utf8_with_errors(const char16_t* Ad 1. When values are less than 0x0800, it means that a 16-bit code unit - can be converted into: 1) single UTF8 byte (when it's an ASCII + can be converted into: 1) single UTF8 byte (when it is an ASCII char) or 2) two UTF8 bytes. For this case we do only some shuffle to obtain these 2-byte @@ -27040,24 +27055,9 @@ size_t convert_masked_utf8_to_latin1(const char *input, // This results in more instructions but, potentially, also higher speeds. // const __m128i in = _mm_loadu_si128((__m128i *)input); - const __m128i in_second_half = _mm_loadu_si128((__m128i *)(input + 16)); const uint16_t input_utf8_end_of_code_point_mask = - utf8_end_of_code_point_mask & 0xfff; //we're only processing 12 bytes in case it`s not all ASCII - - if((input_utf8_end_of_code_point_mask & 0xffffffff) == 0xffffffff) { - // Load the next 128 bits. - - // Combine the two 128-bit registers into a single 256-bit register. - __m256i in_combined = _mm256_set_m128i(in_second_half, in); - - // We process the data in chunks of 32 bytes. - _mm256_storeu_si256(reinterpret_cast<__m256i *>(latin1_output), in_combined); - - latin1_output += 32; // We wrote 32 characters. - return 32; // We consumed 32 bytes. - } - + utf8_end_of_code_point_mask & 0xfff; // we are only processing 12 bytes in case it is not all ASCII if(((utf8_end_of_code_point_mask & 0xffff) == 0xffff)) { // We process the data in chunks of 16 bytes. @@ -27771,7 +27771,7 @@ simdutf_really_inline const uint8_t *buf_block_reader::full_block() c template simdutf_really_inline size_t buf_block_reader::get_remainder(uint8_t *dst) const { if(len == idx) { return 0; } // memcpy(dst, null, 0) will trigger an error with some sanitizers - std::memset(dst, 0x20, STEP_SIZE); // std::memset STEP_SIZE because it's more efficient to write out 8 or 16 bytes at once. + std::memset(dst, 0x20, STEP_SIZE); // std::memset STEP_SIZE because it is more efficient to write out 8 or 16 bytes at once. std::memcpy(dst, buf + idx, len - idx); return len - idx; } @@ -27894,7 +27894,7 @@ using namespace simd; // // Return nonzero if there are incomplete multibyte characters at the end of the block: - // e.g. if there is a 4-byte character, but it's 3 bytes from the end. + // e.g. if there is a 4-byte character, but it is 3 bytes from the end. // simdutf_really_inline simd8 is_incomplete(const simd8 input) { // If the previous input's last 3 bytes match this, they're too short (they ended at EOF): @@ -30170,7 +30170,7 @@ simdutf_really_inline const uint8_t *buf_block_reader::full_block() c template simdutf_really_inline size_t buf_block_reader::get_remainder(uint8_t *dst) const { if(len == idx) { return 0; } // memcpy(dst, null, 0) will trigger an error with some sanitizers - std::memset(dst, 0x20, STEP_SIZE); // std::memset STEP_SIZE because it's more efficient to write out 8 or 16 bytes at once. + std::memset(dst, 0x20, STEP_SIZE); // std::memset STEP_SIZE because it is more efficient to write out 8 or 16 bytes at once. std::memcpy(dst, buf + idx, len - idx); return len - idx; } @@ -30293,7 +30293,7 @@ using namespace simd; // // Return nonzero if there are incomplete multibyte characters at the end of the block: - // e.g. if there is a 4-byte character, but it's 3 bytes from the end. + // e.g. if there is a 4-byte character, but it is 3 bytes from the end. // simdutf_really_inline simd8 is_incomplete(const simd8 input) { // If the previous input's last 3 bytes match this, they're too short (they ended at EOF): @@ -33450,7 +33450,7 @@ int sse_detect_encodings(const char * buf, size_t len) { - there must not be two consecutive high surrogates (0xdc00 .. 0xdfff) - there must not be sole low surrogate nor high surrogate - We're going to build three bitmasks based on the 3rd nibble: + We are going to build three bitmasks based on the 3rd nibble: - V = valid word, - L = low surrogate (0xd800 .. 0xdbff) - H = high surrogate (0xdc00 .. 0xdfff) @@ -34167,7 +34167,7 @@ size_t convert_masked_utf8_to_latin1(const char *input, // const __m128i in = _mm_loadu_si128((__m128i *)input); const uint16_t input_utf8_end_of_code_point_mask = - utf8_end_of_code_point_mask & 0xfff; //we're only processing 12 bytes in case it`s not all ASCII + utf8_end_of_code_point_mask & 0xfff; // we are only processing 12 bytes in case it is not all ASCII if(((utf8_end_of_code_point_mask & 0xffff) == 0xffff)) { // We process the data in chunks of 16 bytes. _mm_storeu_si128(reinterpret_cast<__m128i *>(latin1_output), in); @@ -34280,7 +34280,7 @@ std::pair sse_convert_utf16_to_latin1_with_errors(const char16_t* Ad 1. When values are less than 0x0800, it means that a 16-bit code unit - can be converted into: 1) single UTF8 byte (when it's an ASCII + can be converted into: 1) single UTF8 byte (when it is an ASCII char) or 2) two UTF8 bytes. For this case we do only some shuffle to obtain these 2-byte @@ -35009,7 +35009,7 @@ std::pair sse_convert_utf32_to_utf8(const char32_t* buf, _mm_and_si128(in, v_7fffffff), _mm_and_si128(nextin, v_7fffffff) );//in this context pack the two __m128 into a single - //By ensuring the highest bit is set to 0(&v_7fffffff), we're making sure all values are interpreted as non-negative, or specifically, the values are within the range of valid Unicode code points. + //By ensuring the highest bit is set to 0(&v_7fffffff), we are making sure all values are interpreted as non-negative, or specifically, the values are within the range of valid Unicode code points. //remember : having leading byte 0 means a positive number by the two complements system. Unicode is well beneath the range where you'll start getting issues so that's OK. // Try to apply UTF-16 => UTF-8 from ./sse_convert_utf16_to_utf8.cpp @@ -36301,7 +36301,7 @@ simdutf_really_inline const uint8_t *buf_block_reader::full_block() c template simdutf_really_inline size_t buf_block_reader::get_remainder(uint8_t *dst) const { if(len == idx) { return 0; } // memcpy(dst, null, 0) will trigger an error with some sanitizers - std::memset(dst, 0x20, STEP_SIZE); // std::memset STEP_SIZE because it's more efficient to write out 8 or 16 bytes at once. + std::memset(dst, 0x20, STEP_SIZE); // std::memset STEP_SIZE because it is more efficient to write out 8 or 16 bytes at once. std::memcpy(dst, buf + idx, len - idx); return len - idx; } @@ -36424,7 +36424,7 @@ using namespace simd; // // Return nonzero if there are incomplete multibyte characters at the end of the block: - // e.g. if there is a 4-byte character, but it's 3 bytes from the end. + // e.g. if there is a 4-byte character, but it is 3 bytes from the end. // simdutf_really_inline simd8 is_incomplete(const simd8 input) { // If the previous input's last 3 bytes match this, they're too short (they ended at EOF): diff --git a/deps/simdutf/simdutf.h b/deps/simdutf/simdutf.h index ee7f19ce1f960a..3b206d69bcb034 100644 --- a/deps/simdutf/simdutf.h +++ b/deps/simdutf/simdutf.h @@ -1,4 +1,4 @@ -/* auto-generated on 2024-07-11 00:01:58 -0400. Do not edit! */ +/* auto-generated on 2024-07-31 17:22:10 -0400. Do not edit! */ /* begin file include/simdutf.h */ #ifndef SIMDUTF_H #define SIMDUTF_H @@ -594,7 +594,7 @@ SIMDUTF_DISABLE_UNDESIRED_WARNINGS #define SIMDUTF_SIMDUTF_VERSION_H /** The version of simdutf being used (major.minor.revision) */ -#define SIMDUTF_VERSION "5.3.0" +#define SIMDUTF_VERSION "5.3.1" namespace simdutf { enum { @@ -609,7 +609,7 @@ enum { /** * The revision (major.minor.REVISION) of simdutf being used. */ - SIMDUTF_VERSION_REVISION = 0 + SIMDUTF_VERSION_REVISION = 1 }; } // namespace simdutf