From a2071184cfae03854939c8d72d865f492b6a2b5e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Tue, 11 Jun 2024 22:37:49 +0200 Subject: [PATCH] Regexes: Sanitize user input (#1624) * Sanitize regex input * Comments inside sanitize_regex() * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Escape \, too --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Axel Huebl --- src/Series.cpp | 29 ++++++++++++++++++++++------- test/SerialIOTest.cpp | 8 ++++++++ 2 files changed, 30 insertions(+), 7 deletions(-) diff --git a/src/Series.cpp b/src/Series.cpp index fcd382bc96..09921566bf 100644 --- a/src/Series.cpp +++ b/src/Series.cpp @@ -3040,12 +3040,12 @@ namespace std::smatch regexMatches; bool match = std::regex_match(filename, regexMatches, pattern); int processedPadding = - padding != 0 ? padding : (match ? regexMatches[1].length() : 0); + padding != 0 ? padding : (match ? regexMatches[2].length() : 0); return { match, processedPadding, padding < 0 ? padding - : match ? std::stoull(regexMatches[1]) + : match ? std::stoull(regexMatches[2]) : 0, index_of_extension.has_value() ? std::make_optional( @@ -3054,17 +3054,31 @@ namespace }; } + namespace + { + auto sanitize_regex(std::string const &input) -> std::string + { + // need to escape special characters reserved for regexes, see + // https://stackoverflow.com/questions/40195412/c11-regex-search-for-exact-string-escape + // https://regex101.com/r/GDPK7E/3 + std::regex specialChars{R"([-[\]{}()*+?.,\^$|#\s\\])"}; + // `$&` is the matched substring, see + // https://en.cppreference.com/w/cpp/regex/regex_replace + return std::regex_replace(input, specialChars, R"(\$&)"); + } + } // namespace + std::function matcher( std::string const &prefix, int padding, std::string const &postfix, std::optional const &filenameSuffix) { - std::string nameReg = "^" + prefix; + std::string nameReg = "^(" + sanitize_regex(prefix) + ")"; size_t index_of_extension = 0; if (padding < 0) { - index_of_extension = 1; + index_of_extension = 3; } else if (padding > 0) { @@ -3076,16 +3090,17 @@ namespace // iteration number via std::stoull(regexMatches[1]) nameReg += "(([1-9][[:digit:]]*)?([[:digit:]]"; nameReg += "{" + std::to_string(padding) + "}))"; - index_of_extension = 4; + index_of_extension = 6; } else { // No padding specified, any number of digits is ok. nameReg += "([[:digit:]]"; nameReg += "+)"; - index_of_extension = 2; + index_of_extension = 4; } - nameReg += postfix + filenameSuffix.value_or("(\\.[[:alnum:]]+)") + "$"; + nameReg += "(" + sanitize_regex(postfix) + ")" + + filenameSuffix.value_or("(\\.[[:alnum:]]+)") + "$"; return buildMatcher( nameReg, padding, diff --git a/test/SerialIOTest.cpp b/test/SerialIOTest.cpp index 524b2af6a3..ae2a88fc98 100644 --- a/test/SerialIOTest.cpp +++ b/test/SerialIOTest.cpp @@ -5095,8 +5095,16 @@ TEST_CASE("serial_iterator", "[serial][adios2]") { for (auto const &t : testedFileExtensions()) { +#ifdef _WIN32 serial_iterator("../samples/serial_iterator_filebased_%T." + t); serial_iterator("../samples/serial_iterator_groupbased." + t); +#else + // Add some regex characters into the file names to see that we can deal + // with that. Don't do that on Windows because Windows does not like + // those characters within file paths. + serial_iterator("../samples/serial_iterator_filebased_+?_%T." + t); + serial_iterator("../samples/serial_iterator_groupbased_+?." + t); +#endif } }