From de99fb5725d0b228a8ffcefa9d080458d06daaa3 Mon Sep 17 00:00:00 2001 From: Andreas Drewke Date: Fri, 19 Jan 2024 19:35:02 +0100 Subject: [PATCH] StringTools: synced with MiniScript --- src/tdme/utilities/StringTools.cpp | 59 +++++++++++++++----------- src/tdme/utilities/StringTools.h | 11 ++--- src/tdme/utilities/UTF8StringTools.cpp | 18 +++++--- src/tdme/utilities/UTF8StringTools.h | 48 +++++++++++---------- 4 files changed, 78 insertions(+), 58 deletions(-) diff --git a/src/tdme/utilities/StringTools.cpp b/src/tdme/utilities/StringTools.cpp index dc095ff25..bd4a77fe0 100644 --- a/src/tdme/utilities/StringTools.cpp +++ b/src/tdme/utilities/StringTools.cpp @@ -9,6 +9,7 @@ #include #include #include +#include using std::find_if; using std::isspace; @@ -16,6 +17,7 @@ using std::regex; using std::regex_match; using std::regex_replace; using std::replace; +using std::smatch; using std::string; using std::string_view; using std::tolower; @@ -26,15 +28,16 @@ using tdme::utilities::StringTools; using tdme::utilities::Character; using tdme::utilities::StringTokenizer; +using tdme::utilities::UTF8CharacterIterator; -const string StringTools::replace(const string& src, const char what, const char by, int64_t beginIndex) { - string result = src; +const string StringTools::replace(const string& str, const char what, const char by, int64_t beginIndex) { + string result = str; std::replace(result.begin() + beginIndex, result.end(), what, by); return result; } -const string StringTools::replace(const string& src, const string& what, const string& by, int64_t beginIndex) { - string result = src; +const string StringTools::replace(const string& str, const string& what, const string& by, int64_t beginIndex) { + string result = str; if (what.empty()) return result; while ((beginIndex = result.find(what, beginIndex)) != std::string::npos) { result.replace(beginIndex, what.length(), by); @@ -51,8 +54,8 @@ bool StringTools::equalsIgnoreCase(const string& string1, const string& string2) return stringA == stringB; } -const string StringTools::trim(const string& src) { - string result = src; +const string StringTools::trim(const string& str) { + string result = str; result.erase( result.begin(), find_if( @@ -76,42 +79,48 @@ const string StringTools::trim(const string& src) { return result; } -const string_view StringTools::viewTrim(const string_view& src) { +const string_view StringTools::viewTrim(const string_view& str) { int64_t start = 0; - for (int64_t i = 0; i < src.size(); i++) { - if (isspace(src[i]) != 0) start++; else break; + for (int64_t i = 0; i < str.size(); i++) { + if (isspace(str[i]) != 0) start++; else break; } int64_t end = 0; - for (int64_t i = src.size() - 1; i >= 0; i--) { - if (isspace(src[i]) != 0) end++; else break; + for (int64_t i = str.size() - 1; i >= 0; i--) { + if (isspace(str[i]) != 0) end++; else break; } - return string_view(&src[start], src.size() - start - end); + return string_view(&str[start], str.size() - start - end); } -const string StringTools::toLowerCase(const string& src) { - string result = src; +const string StringTools::toLowerCase(const string& str) { + string result = str; transform(result.begin(), result.end(), result.begin(), (int(*)(int))tolower); return result; } -const string StringTools::toUpperCase(const string& src) { - string result = src; +const string StringTools::toUpperCase(const string& str) { + string result = str; transform(result.begin(), result.end(), result.begin(), (int(*)(int))toupper); return result; } -bool StringTools::regexMatch(const string& src, const string& pattern) { - // TODO: return found groups - return regex_match(src, regex(pattern, std::regex::ECMAScript)); +bool StringTools::regexMatch(const string& str, const string& pattern, smatch* matches) { + if (matches == nullptr) { + return regex_match(str, regex(pattern, std::regex::ECMAScript)); + } else { + return regex_match(str, *matches, regex(pattern, std::regex::ECMAScript)); + } } -bool StringTools::regexSearch(const string& src, const string& pattern) { - // TODO: return found groups - return regex_search(src, regex(pattern, std::regex::ECMAScript)); +bool StringTools::regexSearch(const string& str, const string& pattern, smatch* matches) { + if (matches == nullptr) { + return regex_search(str, regex(pattern, std::regex::ECMAScript)); + } else { + return regex_search(str, *matches, regex(pattern, std::regex::ECMAScript)); + } } -const string StringTools::regexReplace(const string& src, const string& pattern, const string& by) { - return regex_replace(src, regex(pattern, std::regex::ECMAScript), by); +const string StringTools::regexReplace(const string& str, const string& pattern, const string& by) { + return regex_replace(str, regex(pattern, std::regex::ECMAScript), by); } const vector StringTools::tokenize(const string& str, const string& delimiters, bool emptyTokens) { @@ -131,7 +140,7 @@ const string StringTools::getUTF8CharAt(const string& str, int64_t index) { UTF8CharacterIterator u8It(str); u8It.seekCharacterPosition(index); // - return u8It.hasNext() == true?Character::toString(u8It.next()):string(); + return u8It.hasNext() == true?::Character::toString(u8It.next()):string(); } int64_t StringTools::getUTF8BinaryIndex(const string& str, int64_t charIdx) { diff --git a/src/tdme/utilities/StringTools.h b/src/tdme/utilities/StringTools.h index f44db24af..31ff630d4 100644 --- a/src/tdme/utilities/StringTools.h +++ b/src/tdme/utilities/StringTools.h @@ -1,19 +1,18 @@ #pragma once +#include #include #include #include #include #include -#include +using std::smatch; using std::string; using std::string_view; using std::vector; -using tdme::utilities::UTF8CharacterIterator; - /** * String tools class * @author Andreas Drewke @@ -277,17 +276,19 @@ class tdme::utilities::StringTools final * Check if pattern matches whole string * @param str string * @param pattern pattern + * @param matches matches * @return if pattern matches whole string */ - static bool regexMatch(const string& str, const string& pattern); + static bool regexMatch(const string& str, const string& pattern, smatch* matches = nullptr); /** * Do regex pattern search * @param str string * @param pattern pattern + * @param matches matches * @return if search was successful */ - static bool regexSearch(const string& str, const string& pattern); + static bool regexSearch(const string& str, const string& pattern, smatch* matches = nullptr); /** * Replace regex pattern with given string diff --git a/src/tdme/utilities/UTF8StringTools.cpp b/src/tdme/utilities/UTF8StringTools.cpp index a37f90d37..393bcc9e9 100644 --- a/src/tdme/utilities/UTF8StringTools.cpp +++ b/src/tdme/utilities/UTF8StringTools.cpp @@ -221,14 +221,20 @@ const string UTF8StringTools::toUpperCase(const string& str, UTF8CharacterIterat return result; } -bool UTF8StringTools::regexMatch(const string& str, const string& pattern) { - // TODO: return found groups - return regex_match(str, regex(pattern, std::regex::ECMAScript)); +bool UTF8StringTools::regexMatch(const string& str, const string& pattern, smatch* matches) { + if (matches == nullptr) { + return regex_match(str, regex(pattern, std::regex::ECMAScript)); + } else { + return regex_match(str, *matches, regex(pattern, std::regex::ECMAScript)); + } } -bool UTF8StringTools::regexSearch(const string& str, const string& pattern) { - // TODO: return found groups - return regex_search(str, regex(pattern, std::regex::ECMAScript)); +bool UTF8StringTools::regexSearch(const string& str, const string& pattern, smatch* matches) { + if (matches == nullptr) { + return regex_search(str, regex(pattern, std::regex::ECMAScript)); + } else { + return regex_search(str, *matches, regex(pattern, std::regex::ECMAScript)); + } } const string UTF8StringTools::regexReplace(const string& str, const string& pattern, const string& by) { diff --git a/src/tdme/utilities/UTF8StringTools.h b/src/tdme/utilities/UTF8StringTools.h index 6b27b0745..4ccaf875e 100644 --- a/src/tdme/utilities/UTF8StringTools.h +++ b/src/tdme/utilities/UTF8StringTools.h @@ -1,5 +1,5 @@ #pragma once - +#include #include #include #include @@ -8,10 +8,12 @@ #include #include +using std::smatch; using std::string; using std::string_view; using std::vector; +using tdme::utilities::UTF8CharacterIterator; /** * UTF8 String tools class * @author Andreas Drewke @@ -77,7 +79,7 @@ class tdme::utilities::UTF8StringTools final const string& what, const string& by, int64_t beginIndex = 0, - ::tdme::utilities::UTF8CharacterIterator::UTF8PositionCache* cache = nullptr + UTF8CharacterIterator::UTF8PositionCache* cache = nullptr ); /** @@ -92,7 +94,7 @@ class tdme::utilities::UTF8StringTools final const string& str, const string& what, int64_t beginIndex = 0, - ::tdme::utilities::UTF8CharacterIterator::UTF8PositionCache* cache = nullptr + UTF8CharacterIterator::UTF8PositionCache* cache = nullptr ) { return firstIndexOf(str, what, beginIndex, cache); } @@ -109,7 +111,7 @@ class tdme::utilities::UTF8StringTools final const string& str, const string& what, int64_t beginIndex = 0, - ::tdme::utilities::UTF8CharacterIterator::UTF8PositionCache* cache = nullptr + UTF8CharacterIterator::UTF8PositionCache* cache = nullptr ); /** @@ -124,7 +126,7 @@ class tdme::utilities::UTF8StringTools final const string& str, const string& what, int64_t endIndex = string::npos, - ::tdme::utilities::UTF8CharacterIterator::UTF8PositionCache* cache = nullptr + UTF8CharacterIterator::UTF8PositionCache* cache = nullptr ); /** @@ -136,7 +138,7 @@ class tdme::utilities::UTF8StringTools final * @param whatCache what UTF8 position cache * @return index or string::npos if not found */ - static int64_t firstIndexOfChars(const string& str, const string& what, int64_t beginIndex = 0, ::tdme::utilities::UTF8CharacterIterator::UTF8PositionCache* srcCache = nullptr, ::tdme::utilities::UTF8CharacterIterator::UTF8PositionCache* whatCache = nullptr); + static int64_t firstIndexOfChars(const string& str, const string& what, int64_t beginIndex = 0, UTF8CharacterIterator::UTF8PositionCache* srcCache = nullptr, UTF8CharacterIterator::UTF8PositionCache* whatCache = nullptr); /** * Finds last index of characters provided within given string @@ -147,7 +149,7 @@ class tdme::utilities::UTF8StringTools final * @param whatCache what UTF8 position cache * @return index or string::npos if not found */ - static int64_t lastIndexOfChars(const string& str, const string& what, int64_t endIndex = string::npos, ::tdme::utilities::UTF8CharacterIterator::UTF8PositionCache* srcCache = nullptr, ::tdme::utilities::UTF8CharacterIterator::UTF8PositionCache* whatCache = nullptr); + static int64_t lastIndexOfChars(const string& str, const string& what, int64_t endIndex = string::npos, UTF8CharacterIterator::UTF8PositionCache* srcCache = nullptr, UTF8CharacterIterator::UTF8PositionCache* whatCache = nullptr); /** * Returns substring of given string from begin index to end index @@ -157,7 +159,7 @@ class tdme::utilities::UTF8StringTools final * @param cache str UTF8 position cache * @return substring result */ - inline static const string substring(const string& str, int64_t beginIndex, int64_t endIndex = string::npos, ::tdme::utilities::UTF8CharacterIterator::UTF8PositionCache* srcCache = nullptr, ::tdme::utilities::UTF8CharacterIterator::UTF8PositionCache* cache = nullptr) { + inline static const string substring(const string& str, int64_t beginIndex, int64_t endIndex = string::npos, UTF8CharacterIterator::UTF8PositionCache* srcCache = nullptr, UTF8CharacterIterator::UTF8PositionCache* cache = nullptr) { auto result = viewSubstring(string_view(str), beginIndex, endIndex, cache); return string(result.data(), result.size()); } @@ -170,7 +172,7 @@ class tdme::utilities::UTF8StringTools final * @param cache str UTF8 position cache * @return substring result */ - static const string_view viewSubstring(const string_view& str, int64_t beginIndex, int64_t endIndex, ::tdme::utilities::UTF8CharacterIterator::UTF8PositionCache* cache = nullptr); + static const string_view viewSubstring(const string_view& str, int64_t beginIndex, int64_t endIndex, UTF8CharacterIterator::UTF8PositionCache* cache = nullptr); /** * Checks if strings equal ignoring case @@ -183,8 +185,8 @@ class tdme::utilities::UTF8StringTools final static bool equalsIgnoreCase( const string& string1, const string& string2, - ::tdme::utilities::UTF8CharacterIterator::UTF8PositionCache* string1Cache = nullptr, - ::tdme::utilities::UTF8CharacterIterator::UTF8PositionCache* string2Cache = nullptr + UTF8CharacterIterator::UTF8PositionCache* string1Cache = nullptr, + UTF8CharacterIterator::UTF8PositionCache* string2Cache = nullptr ); /** @@ -193,7 +195,7 @@ class tdme::utilities::UTF8StringTools final * @param cache UTF8 position cache * @return trimmed string */ - static const string trim(const string& str, ::tdme::utilities::UTF8CharacterIterator::UTF8PositionCache* cache = nullptr); + static const string trim(const string& str, UTF8CharacterIterator::UTF8PositionCache* cache = nullptr); /** * Trim string @@ -201,7 +203,7 @@ class tdme::utilities::UTF8StringTools final * @param cache UTF8 position cache * @return trimmed string */ - static const string_view viewTrim(const string_view& str, ::tdme::utilities::UTF8CharacterIterator::UTF8PositionCache* cache = nullptr); + static const string_view viewTrim(const string_view& str, UTF8CharacterIterator::UTF8PositionCache* cache = nullptr); /** * Transform string to lower case @@ -209,7 +211,7 @@ class tdme::utilities::UTF8StringTools final * @param cache UTF8 position cache * @return lowercase string */ - static const string toLowerCase(const string& str, ::tdme::utilities::UTF8CharacterIterator::UTF8PositionCache* cache = nullptr); + static const string toLowerCase(const string& str, UTF8CharacterIterator::UTF8PositionCache* cache = nullptr); /** * Transform string to upper case @@ -217,23 +219,25 @@ class tdme::utilities::UTF8StringTools final * @param cache UTF8 position cache * @return uppercase string */ - static const string toUpperCase(const string& str, ::tdme::utilities::UTF8CharacterIterator::UTF8PositionCache* cache = nullptr); + static const string toUpperCase(const string& str, UTF8CharacterIterator::UTF8PositionCache* cache = nullptr); /** * Check if pattern matches whole string * @param str string * @param pattern pattern + * @param matches matches * @return if pattern matches whole string */ - static bool regexMatch(const string& str, const string& pattern); + static bool regexMatch(const string& str, const string& pattern, smatch* matches = nullptr); /** * Do regex pattern search * @param str string * @param pattern pattern + * @param matches matches * @return if search was successful */ - static bool regexSearch(const string& str, const string& pattern); + static bool regexSearch(const string& str, const string& pattern, smatch* matches = nullptr); /** * Replace regex pattern with given string @@ -261,7 +265,7 @@ class tdme::utilities::UTF8StringTools final * @param cache str UTF8 position cache * @return padded string */ - static const string padLeft(const string& str, const string& by, int64_t toLength, ::tdme::utilities::UTF8CharacterIterator::UTF8PositionCache* cache = nullptr); + static const string padLeft(const string& str, const string& by, int64_t toLength, UTF8CharacterIterator::UTF8PositionCache* cache = nullptr); /** * Pad a string right @@ -271,7 +275,7 @@ class tdme::utilities::UTF8StringTools final * @param cache str UTF8 position cache * @return padded string */ - static const string padRight(const string& str, const string& by, int64_t toLength, ::tdme::utilities::UTF8CharacterIterator::UTF8PositionCache* cache = nullptr); + static const string padRight(const string& str, const string& by, int64_t toLength, UTF8CharacterIterator::UTF8PositionCache* cache = nullptr); /** * Indent a string @@ -304,7 +308,7 @@ class tdme::utilities::UTF8StringTools final * @param cache UTF8 position cache * @return UTF8 string length */ - static int64_t getLength(const string& str, ::tdme::utilities::UTF8CharacterIterator::UTF8PositionCache* cache = nullptr); + static int64_t getLength(const string& str, UTF8CharacterIterator::UTF8PositionCache* cache = nullptr); /** * Get UTF8 character at given index @@ -312,7 +316,7 @@ class tdme::utilities::UTF8StringTools final * @param index index * @param cache UTF8 position cache */ - static const string getCharAt(const string& str, int64_t index, ::tdme::utilities::UTF8CharacterIterator::UTF8PositionCache* cache = nullptr); + static const string getCharAt(const string& str, int64_t index, UTF8CharacterIterator::UTF8PositionCache* cache = nullptr); /** * Get UTF8 binary buffer index @@ -321,7 +325,7 @@ class tdme::utilities::UTF8StringTools final * @param cache UTF8 position cache * @return UTF binary buffer position from given character/code point index */ - static int64_t getUTF8BinaryIndex(const string& str, int64_t charIdx, ::tdme::utilities::UTF8CharacterIterator::UTF8PositionCache* cache = nullptr); + static int64_t getUTF8BinaryIndex(const string& str, int64_t charIdx, UTF8CharacterIterator::UTF8PositionCache* cache = nullptr); };