diff --git a/benchmarks/src/regex_search.cpp b/benchmarks/src/regex_search.cpp index bc6a3d12537..19018fc80f2 100644 --- a/benchmarks/src/regex_search.cpp +++ b/benchmarks/src/regex_search.cpp @@ -8,13 +8,14 @@ #include "lorem.hpp" using namespace std; +using namespace regex_constants; -void bm_lorem_search(benchmark::State& state, const char* pattern) { +void bm_lorem_search(benchmark::State& state, const char* pattern, syntax_option_type syntax = ECMAScript) { string repeated_lorem{lorem_ipsum}; for (long long i = 0; i < state.range(); ++i) { repeated_lorem += repeated_lorem; } - regex re{pattern}; + regex re{pattern, syntax}; for (auto _ : state) { benchmark::DoNotOptimize(repeated_lorem); @@ -33,6 +34,7 @@ void bm_lorem_search(benchmark::State& state, const char* pattern) { BENCHMARK_CAPTURE(bm_lorem_search, "^bibe", "^bibe")->Arg(2)->Arg(3)->Arg(4); BENCHMARK_CAPTURE(bm_lorem_search, "bibe", "bibe")->Arg(2)->Arg(3)->Arg(4); +BENCHMARK_CAPTURE(bm_lorem_search, "bibe".collate, "bibe", regex_constants::collate)->Arg(2)->Arg(3)->Arg(4); BENCHMARK_CAPTURE(bm_lorem_search, "(bibe)", "(bibe)")->Arg(2)->Arg(3)->Arg(4); BENCHMARK_CAPTURE(bm_lorem_search, "(bibe)+", "(bibe)+")->Arg(2)->Arg(3)->Arg(4); BENCHMARK_CAPTURE(bm_lorem_search, "(?:bibe)+", "(?:bibe)+")->Arg(2)->Arg(3)->Arg(4); diff --git a/stl/inc/regex b/stl/inc/regex index 3753efd523d..7aafd651bb5 100644 --- a/stl/inc/regex +++ b/stl/inc/regex @@ -3693,6 +3693,8 @@ _BidIt1 _Compare_translate_both(_BidIt1 _Begin1, _BidIt1 _End1, _BidIt2 _Begin2, // compare character ranges, translating characters in both ranges according to syntax options if (_Sflags & regex_constants::icase) { return _STD _Cmp_chrange(_Begin1, _End1, _Begin2, _End2, _Cmp_icase<_RxTraits>{_Traits}); + } else if constexpr (_Is_any_of_v<_RxTraits, regex_traits, regex_traits>) { + return _STD _Cmp_chrange(_Begin1, _End1, _Begin2, _End2, equal_to{}); } else if (_Sflags & regex_constants::collate) { return _STD _Cmp_chrange(_Begin1, _End1, _Begin2, _End2, _Cmp_collate<_RxTraits>{_Traits}); } else { @@ -3706,6 +3708,8 @@ _BidIt1 _Compare_translate_left(_BidIt1 _Begin1, _BidIt1 _End1, _BidIt2 _Begin2, // compare character ranges, translating characters in the left range according to syntax options if (_Sflags & regex_constants::icase) { return _STD _Cmp_chrange(_Begin1, _End1, _Begin2, _End2, _Cmp_icase_translateleft<_RxTraits>{_Traits}); + } else if constexpr (_Is_any_of_v<_RxTraits, regex_traits, regex_traits>) { + return _STD _Cmp_chrange(_Begin1, _End1, _Begin2, _End2, equal_to{}); } else if (_Sflags & regex_constants::collate) { return _STD _Cmp_chrange(_Begin1, _End1, _Begin2, _End2, _Cmp_collate_translateleft<_RxTraits>{_Traits}); } else { @@ -3720,6 +3724,8 @@ _BidIt1 _Search_translate_left(_BidIt1 _Begin1, _BidIt1 _End1, _BidIt2 _Begin2, // after translating characters in the left sequence according to syntax options if (_Sflags & regex_constants::icase) { return _STD search(_Begin1, _End1, _Begin2, _End2, _Cmp_icase_translateleft<_RxTraits>{_Traits}); + } else if constexpr (_Is_any_of_v<_RxTraits, regex_traits, regex_traits>) { + return _STD search(_Begin1, _End1, _Begin2, _End2, equal_to{}); } else if (_Sflags & regex_constants::collate) { return _STD search(_Begin1, _End1, _Begin2, _End2, _Cmp_collate_translateleft<_RxTraits>{_Traits}); } else { diff --git a/tests/std/tests/VSO_0000000_regex_use/test.cpp b/tests/std/tests/VSO_0000000_regex_use/test.cpp index a1e41b84421..ea6e246adb6 100644 --- a/tests/std/tests/VSO_0000000_regex_use/test.cpp +++ b/tests/std/tests/VSO_0000000_regex_use/test.cpp @@ -2103,6 +2103,32 @@ void test_gh_5576() { match_default, "AbGweEXfFlXlLLlffflEXlF"); } +void test_gh_5672() { + // GH-5672: Speed up skip optimization for default `regex_traits` in `collate` mode + // The PR added a faster branch in the skip optimization when matching in collate mode + // for default `regex_traits` and `regex_traits`. + // The following tests check that searching still works correctly when the faster branch is engaged. + { + test_regex collating_re(&g_regexTester, "g", regex_constants::collate); + + collating_re.should_search_match("abcdefghijklmnopqrstuvwxyz", "g"); + collating_re.should_search_fail("ABCDEFGHIJKLMNOPQRSTUVWXYZ"); + collating_re.should_search_match("zyxwvutsrqponmlkjihgfedcba", "g"); + collating_re.should_search_fail("ZYXWVUTSRQPONMLKJIHGFEDCBA"); + collating_re.should_search_fail("zyxwvutsrqponmlkjihedcba"); + } + + { + test_wregex collating_re(&g_regexTester, L"g", regex_constants::collate); + + collating_re.should_search_match(L"abcdefghijklmnopqrstuvwxyz", L"g"); + collating_re.should_search_fail(L"ABCDEFGHIJKLMNOPQRSTUVWXYZ"); + collating_re.should_search_match(L"zyxwvutsrqponmlkjihgfedcba", L"g"); + collating_re.should_search_fail(L"ZYXWVUTSRQPONMLKJIHGFEDCBA"); + collating_re.should_search_fail(L"zyxwvutsrqponmlkjihedcba"); + } +} + int main() { test_dev10_449367_case_insensitivity_should_work(); test_dev11_462743_regex_collate_should_not_disable_regex_icase(); @@ -2153,6 +2179,7 @@ int main() { test_gh_5490(); test_gh_5509(); test_gh_5576(); + test_gh_5672(); return g_regexTester.result(); }