Skip to content

Commit

Permalink
Regexes: Sanitize user input (#1624)
Browse files Browse the repository at this point in the history
* Sanitize regex input

* Comments inside sanitize_regex()

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Escape \, too

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Axel Huebl <axel.huebl@plasma.ninja>
  • Loading branch information
3 people authored Jun 11, 2024
1 parent 534706f commit a207118
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 7 deletions.
29 changes: 22 additions & 7 deletions src/Series.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3040,12 +3040,12 @@ namespace
std::smatch regexMatches;
bool match = std::regex_match(filename, regexMatches, pattern);
int processedPadding =
padding != 0 ? padding : (match ? regexMatches[1].length() : 0);
padding != 0 ? padding : (match ? regexMatches[2].length() : 0);
return {
match,
processedPadding,
padding < 0 ? padding
: match ? std::stoull(regexMatches[1])
: match ? std::stoull(regexMatches[2])
: 0,
index_of_extension.has_value()
? std::make_optional<std::string>(
Expand All @@ -3054,17 +3054,31 @@ namespace
};
}

namespace
{
auto sanitize_regex(std::string const &input) -> std::string
{
// need to escape special characters reserved for regexes, see
// https://stackoverflow.com/questions/40195412/c11-regex-search-for-exact-string-escape
// https://regex101.com/r/GDPK7E/3
std::regex specialChars{R"([-[\]{}()*+?.,\^$|#\s\\])"};
// `$&` is the matched substring, see
// https://en.cppreference.com/w/cpp/regex/regex_replace
return std::regex_replace(input, specialChars, R"(\$&)");
}
} // namespace

std::function<Match(std::string const &)> matcher(
std::string const &prefix,
int padding,
std::string const &postfix,
std::optional<std::string> const &filenameSuffix)
{
std::string nameReg = "^" + prefix;
std::string nameReg = "^(" + sanitize_regex(prefix) + ")";
size_t index_of_extension = 0;
if (padding < 0)
{
index_of_extension = 1;
index_of_extension = 3;
}
else if (padding > 0)
{
Expand All @@ -3076,16 +3090,17 @@ namespace
// iteration number via std::stoull(regexMatches[1])
nameReg += "(([1-9][[:digit:]]*)?([[:digit:]]";
nameReg += "{" + std::to_string(padding) + "}))";
index_of_extension = 4;
index_of_extension = 6;
}
else
{
// No padding specified, any number of digits is ok.
nameReg += "([[:digit:]]";
nameReg += "+)";
index_of_extension = 2;
index_of_extension = 4;
}
nameReg += postfix + filenameSuffix.value_or("(\\.[[:alnum:]]+)") + "$";
nameReg += "(" + sanitize_regex(postfix) + ")" +
filenameSuffix.value_or("(\\.[[:alnum:]]+)") + "$";
return buildMatcher(
nameReg,
padding,
Expand Down
8 changes: 8 additions & 0 deletions test/SerialIOTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5095,8 +5095,16 @@ TEST_CASE("serial_iterator", "[serial][adios2]")
{
for (auto const &t : testedFileExtensions())
{
#ifdef _WIN32
serial_iterator("../samples/serial_iterator_filebased_%T." + t);
serial_iterator("../samples/serial_iterator_groupbased." + t);
#else
// Add some regex characters into the file names to see that we can deal
// with that. Don't do that on Windows because Windows does not like
// those characters within file paths.
serial_iterator("../samples/serial_iterator_filebased_+?_%T." + t);
serial_iterator("../samples/serial_iterator_groupbased_+?." + t);
#endif
}
}

Expand Down

0 comments on commit a207118

Please sign in to comment.