From 7401956e53891fdc8612034fdc881d59c66da44e Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Thu, 22 May 2025 14:45:19 +0300 Subject: [PATCH 1/4] use guard for bitset from string --- stl/src/vector_algorithms.cpp | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/stl/src/vector_algorithms.cpp b/stl/src/vector_algorithms.cpp index 9fa21444820..20c3f717074 100644 --- a/stl/src/vector_algorithms.cpp +++ b/stl/src/vector_algorithms.cpp @@ -7025,7 +7025,8 @@ namespace { using _Traits_2_sse = void; #else // ^^^ defined(_M_ARM64EC) / !defined(_M_ARM64EC) vvv struct _Traits_avx { - using _Vec = __m256i; + using _Guard = _Zeroupper_on_exit; + using _Vec = __m256i; static __m256i _Load(const void* _Src) noexcept { return _mm256_loadu_si256(reinterpret_cast(_Src)); @@ -7045,7 +7046,8 @@ namespace { }; struct _Traits_sse { - using _Vec = __m128i; + using _Guard = char; + using _Vec = __m128i; static __m128i _Load(const void* _Src) noexcept { return _mm_loadu_si128(reinterpret_cast(_Src)); @@ -7174,6 +7176,7 @@ namespace { template bool _Impl(void* const _Dest, const _Elem* const _Src, const size_t _Size_bytes, const size_t _Size_bits, const size_t _Size_chars, const _Elem _Elem0, const _Elem _Elem1) noexcept { + [[maybe_unused]] typename _Traits::_Guard _Guard; // TRANSITION, DevCom-10331414 const auto _Dx0 = _Traits::_Set(_Elem0); const auto _Dx1 = _Traits::_Set(_Elem1); @@ -7190,14 +7193,12 @@ namespace { // Convert characters to bits if (!_Loop<_Traits>(_Src, _Src + _Size_convert, _Dx0, _Dx1, _Out)) { - _Traits::_Exit_vectorized(); // TRANSITION, DevCom-10331414 return false; } // Verify remaining characters, if any if (_Size_convert != _Size_chars && !_Loop<_Traits>(_Src + _Size_convert, _Src + _Size_chars, _Dx0, _Dx1, [](_Traits::_Vec) {})) { - _Traits::_Exit_vectorized(); // TRANSITION, DevCom-10331414 return false; } @@ -7206,8 +7207,6 @@ namespace { memset(_Dst_words, 0, _Byte_length(_Dst_words, _Dst_words_end)); } - _Traits::_Exit_vectorized(); // TRANSITION, DevCom-10331414 - return true; } #endif // ^^^ !defined(_M_ARM64EC) ^^^ From de6a50028e51914af6777125aa9d18a2aab85e63 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Thu, 22 May 2025 14:48:15 +0300 Subject: [PATCH 2/4] do not help the compiler emit classic bit manipulation --- stl/src/vector_algorithms.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/stl/src/vector_algorithms.cpp b/stl/src/vector_algorithms.cpp index 20c3f717074..1a2ca00b514 100644 --- a/stl/src/vector_algorithms.cpp +++ b/stl/src/vector_algorithms.cpp @@ -5716,7 +5716,7 @@ namespace { return true; } - _bittestandreset(&_Unfit_match, _Match_last_pos); + _Unfit_match ^= 1 << _Match_last_pos; } return false; @@ -5805,7 +5805,7 @@ namespace { } } - _bittestandreset(&_Match, _Match_last_pos); + _Match ^= 1 << _Match_last_pos; } return false; From 9a0d2813998416706faa535c3ab625a390c6c818 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Thu, 22 May 2025 14:50:32 +0300 Subject: [PATCH 3/4] unsigned long lambda arg --- stl/src/vector_algorithms.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/stl/src/vector_algorithms.cpp b/stl/src/vector_algorithms.cpp index 1a2ca00b514..370bbaef465 100644 --- a/stl/src/vector_algorithms.cpp +++ b/stl/src/vector_algorithms.cpp @@ -5310,7 +5310,7 @@ namespace { #pragma warning(push) #pragma warning(disable : 4324) // structure was padded due to alignment specifier - const auto _Check_first = [=, &_Mid1](long _Match) noexcept { + const auto _Check_first = [=, &_Mid1](unsigned long _Match) noexcept { while (_Match != 0) { const unsigned int _Pos = _Traits::_Bsr(_Match); @@ -5331,7 +5331,7 @@ namespace { return false; }; - const auto _Check = [=, &_Mid1](long _Match) noexcept { + const auto _Check = [=, &_Mid1](unsigned long _Match) noexcept { while (_Match != 0) { const unsigned int _Pos = _Traits::_Bsr(_Match); @@ -5397,7 +5397,7 @@ namespace { #pragma warning(push) #pragma warning(disable : 4324) // structure was padded due to alignment specifier - const auto _Check = [=, &_Mid1](long _Match) noexcept { + const auto _Check = [=, &_Mid1](unsigned long _Match) noexcept { while (_Match != 0) { const unsigned int _Pos = _Traits::_Bsr(_Match); @@ -5773,7 +5773,7 @@ namespace { #pragma warning(push) #pragma warning(disable : 4324) // structure was padded due to alignment specifier - const auto _Check = [=, &_Mid1](long _Match) noexcept { + const auto _Check = [=, &_Mid1](unsigned long _Match) noexcept { while (_Match != 0) { const void* _Tmp1 = _Mid1; unsigned long _Match_last_pos; From 546aedbceea7b7d3c2e2b81046a4cf891e4ad1c4 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Thu, 22 May 2025 14:59:53 +0300 Subject: [PATCH 4/4] all bitmasks unsigned --- stl/src/vector_algorithms.cpp | 62 +++++++++++++++++------------------ 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/stl/src/vector_algorithms.cpp b/stl/src/vector_algorithms.cpp index 370bbaef465..6d2fb5bc302 100644 --- a/stl/src/vector_algorithms.cpp +++ b/stl/src/vector_algorithms.cpp @@ -2884,7 +2884,7 @@ namespace { do { const __m256i _Data = _mm256_loadu_si256(static_cast(_First)); - int _Bingo = _mm256_movemask_epi8(_Traits::_Cmp_avx(_Data, _Comparand)); + unsigned int _Bingo = _mm256_movemask_epi8(_Traits::_Cmp_avx(_Data, _Comparand)); if constexpr (_Pred == _Predicate::_Not_equal) { _Bingo ^= 0xFFFF'FFFF; @@ -2903,7 +2903,7 @@ namespace { const __m256i _Tail_mask = _Avx2_tail_mask_32(_Avx_tail_size); const __m256i _Data = _mm256_maskload_epi32(static_cast(_First), _Tail_mask); const __m256i _Cmp = _Traits::_Cmp_avx(_Data, _Comparand); - int _Bingo = _mm256_movemask_epi8(_mm256_and_si256(_Cmp, _Tail_mask)); + unsigned int _Bingo = _mm256_movemask_epi8(_mm256_and_si256(_Cmp, _Tail_mask)); if constexpr (_Pred == _Predicate::_Not_equal) { _Bingo ^= (1 << _Avx_tail_size) - 1; @@ -2928,7 +2928,7 @@ namespace { do { const __m128i _Data = _mm_loadu_si128(static_cast(_First)); - int _Bingo = _mm_movemask_epi8(_Traits::_Cmp_sse(_Data, _Comparand)); + unsigned int _Bingo = _mm_movemask_epi8(_Traits::_Cmp_sse(_Data, _Comparand)); if constexpr (_Pred == _Predicate::_Not_equal) { _Bingo ^= 0xFFFF; @@ -2975,7 +2975,7 @@ namespace { do { _Rewind_bytes(_Last, 32); const __m256i _Data = _mm256_loadu_si256(static_cast(_Last)); - int _Bingo = _mm256_movemask_epi8(_Traits::_Cmp_avx(_Data, _Comparand)); + unsigned int _Bingo = _mm256_movemask_epi8(_Traits::_Cmp_avx(_Data, _Comparand)); if constexpr (_Pred == _Predicate::_Not_equal) { _Bingo ^= 0xFFFF'FFFF; @@ -2993,7 +2993,7 @@ namespace { const __m256i _Tail_mask = _Avx2_tail_mask_32(_Avx_tail_size); const __m256i _Data = _mm256_maskload_epi32(static_cast(_Last), _Tail_mask); const __m256i _Cmp = _Traits::_Cmp_avx(_Data, _Comparand); - int _Bingo = _mm256_movemask_epi8(_mm256_and_si256(_Cmp, _Tail_mask)); + unsigned int _Bingo = _mm256_movemask_epi8(_mm256_and_si256(_Cmp, _Tail_mask)); if constexpr (_Pred == _Predicate::_Not_equal) { _Bingo ^= (1 << _Avx_tail_size) - 1; @@ -3017,7 +3017,7 @@ namespace { do { _Rewind_bytes(_Last, 16); const __m128i _Data = _mm_loadu_si128(static_cast(_Last)); - int _Bingo = _mm_movemask_epi8(_Traits::_Cmp_sse(_Data, _Comparand)); + unsigned int _Bingo = _mm_movemask_epi8(_Traits::_Cmp_sse(_Data, _Comparand)); if constexpr (_Pred == _Predicate::_Not_equal) { _Bingo ^= 0xFFFF; @@ -3081,9 +3081,9 @@ namespace { const void* _Next = _First; _Advance_bytes(_Next, sizeof(_Ty)); - const __m256i _Data = _mm256_loadu_si256(static_cast(_First)); - const __m256i _Comparand = _mm256_loadu_si256(static_cast(_Next)); - const int _Bingo = _mm256_movemask_epi8(_Traits::_Cmp_avx(_Data, _Comparand)); + const __m256i _Data = _mm256_loadu_si256(static_cast(_First)); + const __m256i _Comparand = _mm256_loadu_si256(static_cast(_Next)); + const unsigned int _Bingo = _mm256_movemask_epi8(_Traits::_Cmp_avx(_Data, _Comparand)); if (_Bingo != 0) { const unsigned long _Offset = _tzcnt_u32(_Bingo); @@ -3098,11 +3098,11 @@ namespace { const void* _Next = _First; _Advance_bytes(_Next, sizeof(_Ty)); - const __m256i _Tail_mask = _Avx2_tail_mask_32(_Avx_tail_size); - const __m256i _Data = _mm256_maskload_epi32(static_cast(_First), _Tail_mask); - const __m256i _Comparand = _mm256_maskload_epi32(static_cast(_Next), _Tail_mask); - const __m256i _Cmp = _Traits::_Cmp_avx(_Data, _Comparand); - const int _Bingo = _mm256_movemask_epi8(_mm256_and_si256(_Cmp, _Tail_mask)); + const __m256i _Tail_mask = _Avx2_tail_mask_32(_Avx_tail_size); + const __m256i _Data = _mm256_maskload_epi32(static_cast(_First), _Tail_mask); + const __m256i _Comparand = _mm256_maskload_epi32(static_cast(_Next), _Tail_mask); + const __m256i _Cmp = _Traits::_Cmp_avx(_Data, _Comparand); + const unsigned int _Bingo = _mm256_movemask_epi8(_mm256_and_si256(_Cmp, _Tail_mask)); if (_Bingo != 0) { const unsigned long _Offset = _tzcnt_u32(_Bingo); @@ -3124,9 +3124,9 @@ namespace { const void* _Next = _First; _Advance_bytes(_Next, sizeof(_Ty)); - const __m128i _Data = _mm_loadu_si128(static_cast(_First)); - const __m128i _Comparand = _mm_loadu_si128(static_cast(_Next)); - const int _Bingo = _mm_movemask_epi8(_Traits::_Cmp_sse(_Data, _Comparand)); + const __m128i _Data = _mm_loadu_si128(static_cast(_First)); + const __m128i _Comparand = _mm_loadu_si128(static_cast(_Next)); + const unsigned int _Bingo = _mm_movemask_epi8(_Traits::_Cmp_sse(_Data, _Comparand)); if (_Bingo != 0) { unsigned long _Offset; @@ -3183,8 +3183,8 @@ namespace { do { const __m256i _Data = _mm256_loadu_si256(reinterpret_cast(_First)); - const __m256i _Cmp = _Traits::_Cmp_avx(_Comparand, _Data); - const auto _Mask = static_cast(_mm256_movemask_epi8(_Cmp)); + const __m256i _Cmp = _Traits::_Cmp_avx(_Comparand, _Data); + const uint32_t _Mask = _mm256_movemask_epi8(_Cmp); uint64_t _MskX = uint64_t{_Carry} | (uint64_t{_Mask} << 32); @@ -3585,11 +3585,11 @@ namespace { } if (const size_t _Avx_tail_size = _Size_bytes & 0x1C; _Avx_tail_size != 0) { - const __m256i _Tail_mask = _Avx2_tail_mask_32(_Avx_tail_size); - const __m256i _Data = _mm256_maskload_epi32(static_cast(_First), _Tail_mask); - const __m256i _Mask = _mm256_and_si256(_Traits::_Cmp_avx(_Data, _Comparand), _Tail_mask); - const int _Bingo = _mm256_movemask_epi8(_Mask); - const size_t _Tail_count = __popcnt(_Bingo); // Assume available with SSE4.2 + const __m256i _Tail_mask = _Avx2_tail_mask_32(_Avx_tail_size); + const __m256i _Data = _mm256_maskload_epi32(static_cast(_First), _Tail_mask); + const __m256i _Mask = _mm256_and_si256(_Traits::_Cmp_avx(_Data, _Comparand), _Tail_mask); + const unsigned int _Bingo = _mm256_movemask_epi8(_Mask); + const size_t _Tail_count = __popcnt(_Bingo); // Assume available with SSE4.2 _Result += _Tail_count / sizeof(_Ty); _Advance_bytes(_First, _Avx_tail_size); } @@ -4292,8 +4292,8 @@ namespace { _Found = _mm_and_si128(_Found, _Found_part); } - const int _Bingo = _mm_cvtsi128_si32(_Found); - int _Found_pos = _Found_pos_init; + const unsigned int _Bingo = _mm_cvtsi128_si32(_Found); + int _Found_pos = _Found_pos_init; if (_Bingo != 0) { unsigned long _Tmp; @@ -4478,7 +4478,7 @@ namespace { } } - if (const int _Bingo = _mm256_movemask_epi8(_Eq); _Bingo != 0) { + if (const uint32_t _Bingo = _mm256_movemask_epi8(_Eq); _Bingo != 0) { const unsigned long _Offset = _tzcnt_u32(_Bingo); _Advance_bytes(_First1, _Offset); return _First1; @@ -4497,7 +4497,7 @@ namespace { } } - if (const int _Bingo = _mm256_movemask_epi8(_mm256_and_si256(_Eq, _Tail_mask)); _Bingo != 0) { + if (const uint32_t _Bingo = _mm256_movemask_epi8(_mm256_and_si256(_Eq, _Tail_mask)); _Bingo != 0) { const unsigned long _Offset = _tzcnt_u32(_Bingo); _Advance_bytes(_First1, _Offset); return _First1; @@ -4832,8 +4832,8 @@ namespace { _Advance_bytes(_Cur_needle, 16); } - const int _Bingo = _mm_cvtsi128_si32(_Found); - int _Found_pos = _Not_found; + const unsigned int _Bingo = _mm_cvtsi128_si32(_Found); + int _Found_pos = _Not_found; if (_Bingo != 0) { unsigned long _Tmp; @@ -5696,7 +5696,7 @@ namespace { #pragma warning(push) #pragma warning(disable : 4324) // structure was padded due to alignment specifier const auto _Check_unfit = [=, &_Mid1](const unsigned int _Match) noexcept { - long _Unfit_match = _Match & _Needle_unfit_mask; + unsigned long _Unfit_match = _Match & _Needle_unfit_mask; while (_Unfit_match != 0) { const void* _Tmp1 = _Mid1; unsigned long _Match_last_pos;