diff --git a/cmake/install/Config.cmake.in b/cmake/install/Config.cmake.in index 32b8a28a8768..e14b18eeb56e 100644 --- a/cmake/install/Config.cmake.in +++ b/cmake/install/Config.cmake.in @@ -42,7 +42,6 @@ set(USERVER_TESTSUITE_DIR "${USERVER_CMAKE_DIR}/testsuite") set(USERVER_CONAN @USERVER_CONAN@) set(USERVER_IMPL_ORIGINAL_CXX_STANDARD @CMAKE_CXX_STANDARD@) set(USERVER_IMPL_FEATURE_JEMALLOC @USERVER_FEATURE_JEMALLOC@) -set(USERVER_IMPL_FEATURE_RE2 @USERVER_FEATURE_RE2@) set(USERVER_USE_STATIC_LIBS @USERVER_USE_STATIC_LIBS@) set_property(GLOBAL PROPERTY userver_cmake_dir "${USERVER_CMAKE_DIR}") diff --git a/cmake/install/userver-universal-config.cmake b/cmake/install/userver-universal-config.cmake index c706b0b0a730..970ad7e01293 100644 --- a/cmake/install/userver-universal-config.cmake +++ b/cmake/install/userver-universal-config.cmake @@ -43,24 +43,18 @@ if (USERVER_IMPL_FEATURE_JEMALLOC AND endif() endif() -if(USERVER_IMPL_FEATURE_RE2) - if(USERVER_CONAN) - find_package(re2 REQUIRED CONFIG) - else() - find_package(re2 REQUIRED) - endif() -endif() - if (USERVER_CONAN) find_package(cryptopp REQUIRED CONFIG) find_package(yaml-cpp REQUIRED CONFIG) find_package(zstd REQUIRED CONFIG) + find_package(re2 REQUIRED CONFIG) find_package(RapidJSON REQUIRED CONFIG) else() find_package(CryptoPP REQUIRED) find_package(libyamlcpp REQUIRED) find_package(libzstd REQUIRED) + find_package(re2 REQUIRED) endif() include("${USERVER_CMAKE_DIR}/AddGoogleTests.cmake") diff --git a/conanfile.py b/conanfile.py index 6084da5c3514..8593f8bc7884 100644 --- a/conanfile.py +++ b/conanfile.py @@ -30,7 +30,6 @@ class UserverConan(ConanFile): 'fPIC': [True, False], 'lto': [True, False], 'with_jemalloc': [True, False], - 'with_re2': [True, False], 'with_mongodb': [True, False], 'with_postgresql': [True, False], 'with_postgresql_extra': [True, False], @@ -54,7 +53,6 @@ class UserverConan(ConanFile): 'fPIC': True, 'lto': False, 'with_jemalloc': (platform.system() != 'Darwin'), - 'with_re2': True, 'with_mongodb': True, 'with_postgresql': True, 'with_postgresql_extra': False, @@ -118,12 +116,11 @@ def requirements(self): self.requires('yaml-cpp/0.8.0') self.requires('zlib/1.3.1') self.requires('zstd/1.5.5') + self.requires('icu/74.1', force=True) + self.requires('re2/20230301') if self.options.with_jemalloc: self.requires('jemalloc/5.3.0') - if self.options.with_re2: - self.requires('icu/74.1', force=True) - self.requires('re2/20230301') if self.options.with_grpc or self.options.with_clickhouse: self.requires('abseil/20240116.2', force=True) if self.options.with_grpc: @@ -202,7 +199,6 @@ def generate(self): tool_ch.variables['USERVER_LTO'] = self.options.lto tool_ch.variables['USERVER_FEATURE_JEMALLOC'] = self.options.with_jemalloc - tool_ch.variables['USERVER_FEATURE_RE2'] = self.options.with_re2 tool_ch.variables['USERVER_FEATURE_MONGODB'] = self.options.with_mongodb tool_ch.variables['USERVER_FEATURE_POSTGRESQL'] = self.options.with_postgresql tool_ch.variables['USERVER_FEATURE_PATCH_LIBPQ'] = self.options.with_postgresql_extra diff --git a/universal/CMakeLists.txt b/universal/CMakeLists.txt index fbc5162b541e..26abc159db40 100644 --- a/universal/CMakeLists.txt +++ b/universal/CMakeLists.txt @@ -190,17 +190,12 @@ if(NOT USERVER_CONAN) endif() endif() -option(USERVER_FEATURE_RE2 "Build userver with re2 support" ON) -if(USERVER_FEATURE_RE2) - if(USERVER_CONAN) - find_package(re2 REQUIRED CONFIG) - else() - include(SetupRe2) - endif() - target_link_libraries(${PROJECT_NAME} PRIVATE re2::re2) +if(USERVER_CONAN) + find_package(re2 REQUIRED CONFIG) else() - target_compile_definitions(${PROJECT_NAME} PUBLIC USERVER_NO_RE2_SUPPORT=1) + include(SetupRe2) endif() +target_link_libraries(${PROJECT_NAME} PRIVATE re2::re2) target_compile_definitions(${PROJECT_NAME} PRIVATE CRYPTOPP_ENABLE_NAMESPACE_WEAK=1 diff --git a/universal/include/userver/utils/regex.hpp b/universal/include/userver/utils/regex.hpp index a06cbb142444..1b03c8789acb 100644 --- a/universal/include/userver/utils/regex.hpp +++ b/universal/include/userver/utils/regex.hpp @@ -24,11 +24,19 @@ class RegexError : public std::exception {}; /// @brief A drop-in replacement for `std::regex` without huge includes /// and with better performance characteristics. /// -/// Is currently implemented using either Boost.Regex or re2, depending on `USERVER_FEATURE_RE2` flag. +/// utils::regex is currently implemented using re2. /// /// @see @ref utils::regex_match /// @see @ref utils::regex_search /// @see @ref utils::regex_replace +/// +/// Read [re2 documentation](https://github.com/google/re2/wiki/syntax) on the limitations of re2 engine. +/// Notably, it does not support: +/// +/// 1. lookahead and lookbehind; +/// 2. quantifiers over 1000, regexes with large repetition counts consume more memory; +/// 3. spaces in quantifiers like `\w{1, 5}`; +/// 4. possessive quantifiers. class regex final { public: /// Constructs a null regex, any usage except for copy/move is UB. @@ -158,14 +166,10 @@ struct Re2Replacement final { std::string_view replacement; }; -#if !defined(USERVER_NO_RE2_SUPPORT) || defined(DOXYGEN) - /// @overload /// @see utils::Re2Replacement std::string regex_replace(std::string_view str, const regex& pattern, Re2Replacement repl); -#endif - /// @cond bool IsImplicitBoostRegexFallbackAllowed() noexcept; void SetImplicitBoostRegexFallbackAllowed(bool) noexcept; diff --git a/universal/src/utils/regex.cpp b/universal/src/utils/regex.cpp index eeb60744ad07..ab20630eb955 100644 --- a/universal/src/utils/regex.cpp +++ b/universal/src/utils/regex.cpp @@ -8,13 +8,10 @@ #include #include +#include #include #include -#ifndef USERVER_NO_RE2_SUPPORT -#include -#endif - #include #include #include @@ -33,8 +30,6 @@ class RegexErrorImpl : public RegexError { std::string message_; }; -#ifndef USERVER_NO_RE2_SUPPORT - namespace { constexpr std::size_t kGroupsSboSize = 5; @@ -312,100 +307,6 @@ bool IsImplicitBoostRegexFallbackAllowed() noexcept { return implicit_boost_rege void SetImplicitBoostRegexFallbackAllowed(bool allowed) noexcept { implicit_boost_regex_fallback_allowed = allowed; } -#else - -struct regex::Impl { - boost::regex r; - - Impl() = default; - - explicit Impl(std::string_view pattern) try : r(pattern.begin(), pattern.end()) { - } catch (const boost::regex_error& ex) { - throw RegexErrorImpl(ex.what()); - } -}; - -regex::regex() = default; - -regex::regex(std::string_view pattern) : impl_(regex::Impl(pattern)) {} - -regex::~regex() = default; - -regex::regex(const regex&) = default; - -regex::regex(regex&& r) noexcept { impl_->r.swap(r.impl_->r); } - -regex& regex::operator=(const regex&) = default; - -regex& regex::operator=(regex&& r) noexcept { - impl_->r.swap(r.impl_->r); - return *this; -} - -bool regex::operator==(const regex& other) const { return impl_->r == other.impl_->r; } - -std::string_view regex::GetPatternView() const { return std::string_view{impl_->r.expression(), impl_->r.size()}; } - -std::string regex::str() const { return std::string{GetPatternView()}; } - -//////////////////////////////////////////////////////////////// - -struct match_results::Impl { - boost::cmatch m; - - Impl() = default; -}; - -match_results::match_results() = default; - -match_results::~match_results() = default; - -match_results::match_results(const match_results&) = default; - -match_results& match_results::operator=(const match_results&) = default; - -std::size_t match_results::size() const { return impl_->m.size(); } - -std::string_view match_results::operator[](std::size_t sub) const { - auto substr = impl_->m[sub]; - return {&*substr.begin(), static_cast(substr.length())}; -} - -//////////////////////////////////////////////////////////////// - -bool regex_match(std::string_view str, const regex& pattern) { - return boost::regex_match(str.begin(), str.end(), pattern.impl_->r); -} - -bool regex_match(std::string_view str, match_results& m, const regex& pattern) { - return boost::regex_match(str.begin(), str.end(), m.impl_->m, pattern.impl_->r); -} - -bool regex_search(std::string_view str, const regex& pattern) { - return boost::regex_search(str.begin(), str.end(), pattern.impl_->r); -} - -bool regex_search(std::string_view str, match_results& m, const regex& pattern) { - return boost::regex_search(str.begin(), str.end(), m.impl_->m, pattern.impl_->r); -} - -std::string regex_replace(std::string_view str, const regex& pattern, std::string_view repl) { - std::string res; - res.reserve(str.size() + str.size() / 4); - - boost::regex_replace( - std::back_inserter(res), str.begin(), str.end(), pattern.impl_->r, repl, boost::regex_constants::format_literal - ); - - return res; -} - -bool IsImplicitBoostRegexFallbackAllowed() noexcept { return true; } - -void SetImplicitBoostRegexFallbackAllowed(bool /*allowed*/) noexcept {} - -#endif - } // namespace utils USERVER_NAMESPACE_END diff --git a/universal/src/utils/regex_test.cpp b/universal/src/utils/regex_test.cpp index aabe42b81699..838ec4339874 100644 --- a/universal/src/utils/regex_test.cpp +++ b/universal/src/utils/regex_test.cpp @@ -14,12 +14,6 @@ auto AllowBoostRegexScope(bool allow) { return utils::FastScopeGuard([old]() noexcept { utils::SetImplicitBoostRegexFallbackAllowed(old); }); } -#ifndef USERVER_NO_RE2_SUPPORT -constexpr bool kHasRe2Support = true; -#else -constexpr bool kHasRe2Support = false; -#endif - } // namespace TEST(Regex, Ctors) { @@ -36,10 +30,6 @@ TEST(Regex, Ctors) { TEST(Regex, InvalidRegex) { UEXPECT_THROW(utils::regex("regex***"), utils::RegexError); } TEST(Regex, NegativeLookaheadDisallowed) { - if constexpr (!kHasRe2Support) { - GTEST_SKIP() << "No re2 support"; - } - const auto allowed = AllowBoostRegexScope(false); UEXPECT_THROW_MSG( utils::regex{"(?!bad)([a-z]+)(\\d*)"}, @@ -91,10 +81,6 @@ TEST(Regex, MatchNegativeLookahead) { } TEST(Regex, MatchNewlines) { - if constexpr (!kHasRe2Support) { - GTEST_SKIP() << "No re2 support"; - } - // $ matches the end of the whole string as a safe default. const utils::regex r1(R"(^(1\n2\n3)(\n)?$)"); EXPECT_TRUE(utils::regex_search("1\n2\n3\n", r1)); @@ -177,10 +163,6 @@ TEST(Regex, SearchEmptyCaptureGroupsGoldenTest) { } TEST(Regex, SearchNonPresentCaptureGroupsGoldenTest) { - if constexpr (!kHasRe2Support) { - GTEST_SKIP() << "No re2 support"; - } - // 2nd capture group cannot be present in `r` in any way (otherwise nested <> would have to be present), // so utils::regex must return an invalid std::string_view for the 2nd group. // The current implementation returns `nullptr` std::string_view, but the exact value of `.data()` @@ -218,8 +200,6 @@ TEST(Regex, ReplaceEmpty) { EXPECT_EQ(utils::regex_replace("ab123cd", r, "*"), "*a*b**c*d*"); } -#ifndef USERVER_NO_RE2_SUPPORT - TEST(Regex, ReplaceRe2) { const utils::regex r("[a-z]{2}"); EXPECT_EQ(utils::regex_replace("ab0ef1", r, utils::Re2Replacement{"{\\0}"}), "{ab}0{ef}1"); @@ -228,6 +208,4 @@ TEST(Regex, ReplaceRe2) { EXPECT_EQ(utils::regex_replace("ab0ef1", group_regex, utils::Re2Replacement{"(\\2-\\1)"}), "(0-ab)(1-ef)"); } -#endif - USERVER_NAMESPACE_END diff --git a/universal/utest/include/userver/utest/assert_macros.hpp b/universal/utest/include/userver/utest/assert_macros.hpp index c30862cf56f5..18c883cca370 100644 --- a/universal/utest/include/userver/utest/assert_macros.hpp +++ b/universal/utest/include/userver/utest/assert_macros.hpp @@ -44,6 +44,8 @@ std::string AssertThrow( std::string AssertNoThrow(std::function statement, std::string_view statement_text); +std::string QuoteStringForRegex(std::string_view message); + } // namespace utest::impl USERVER_NAMESPACE_END @@ -129,7 +131,8 @@ USERVER_NAMESPACE_END UEXPECT_THROW_MSG(statement, USERVER_NAMESPACE::utils::InvariantError, message_substring) #else // NOLINTNEXTLINE (cppcoreguidelines-macro-usage) -#define EXPECT_UINVARIANT_FAILURE_MSG(statement, message_substring) UEXPECT_DEATH(statement, message_substring) +#define EXPECT_UINVARIANT_FAILURE_MSG(statement, message_substring) \ + UEXPECT_DEATH(statement, USERVER_NAMESPACE::utest::impl::QuoteStringForRegex(message_substring)) #endif /// @endcond diff --git a/universal/utest/src/utest/assert_macros.cpp b/universal/utest/src/utest/assert_macros.cpp index 45cead6ebc0b..eef272ef0bf1 100644 --- a/universal/utest/src/utest/assert_macros.cpp +++ b/universal/utest/src/utest/assert_macros.cpp @@ -1,6 +1,7 @@ #include #include +#include #include #include @@ -137,6 +138,10 @@ std::string AssertNoThrow(std::function statement, std::string_view stat } } +std::string QuoteStringForRegex(std::string_view message) { + return re2::RE2::QuoteMeta(re2::StringPiece{message.data(), message.size()}); +} + } // namespace utest::impl USERVER_NAMESPACE_END