From f1f57bc9f8c8ac918471c9724f7a79ffe3cf49d9 Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Thu, 2 Mar 2023 17:13:19 +0100 Subject: [PATCH 1/5] Add cctypes safe wrappers --- libmamba/include/mamba/core/util_string.hpp | 27 ++++- libmamba/src/core/util_string.cpp | 116 +++++++++++++++++++- 2 files changed, 137 insertions(+), 6 deletions(-) diff --git a/libmamba/include/mamba/core/util_string.hpp b/libmamba/include/mamba/core/util_string.hpp index 903e58913a..8f86aec7e2 100644 --- a/libmamba/include/mamba/core/util_string.hpp +++ b/libmamba/include/mamba/core/util_string.hpp @@ -10,11 +10,12 @@ #include #include #include -#include +#include #include #include #include #include +#include #include #include "mamba/core/util_compare.hpp" @@ -26,8 +27,32 @@ namespace mamba */ const char* raw_str_or_empty(const char* ptr); + /** + * Safe non utf-8 wrapping of (see its doc). + */ + char is_control(char c); + wchar_t is_control(wchar_t c); + char is_print(char c); + wchar_t is_print(wchar_t c); + char is_space(char c); + wchar_t is_space(wchar_t c); + char is_blank(char c); + wchar_t is_blank(wchar_t c); + char is_graphic(char c); + wchar_t is_graphic(wchar_t c); + char is_digit(char c); + wchar_t is_digit(wchar_t c); + char is_alpha(char c); + wchar_t is_alpha(wchar_t c); + char is_alphanum(char c); + wchar_t is_alphanum(wchar_t c); + char is_lower(char c); + wchar_t is_lower(wchar_t c); + char is_upper(char c); + wchar_t is_upper(wchar_t c); char to_lower(char c); wchar_t to_lower(wchar_t c); + std::string to_lower(std::string_view str); std::wstring to_lower(std::wstring_view str); // The use of a template here serves to exclude the overload for const Char* diff --git a/libmamba/src/core/util_string.cpp b/libmamba/src/core/util_string.cpp index e730b1637f..57cea9f1ee 100644 --- a/libmamba/src/core/util_string.cpp +++ b/libmamba/src/core/util_string.cpp @@ -7,15 +7,117 @@ #include "mamba/core/util_string.hpp" #include +#include #include -#include -#include +#include + +#include namespace mamba { - /*************************************************** - * Implementation of to_lower to_upper functions * - ***************************************************/ + /**************************************** + * Implementation of cctype functions * + ****************************************/ + + char is_control(char c) + { + return std::iscntrl(static_cast(c)) != 0; + } + + wchar_t is_control(wchar_t c) + { + return std::iswcntrl(static_cast(c)) != 0; + } + + char is_print(char c) + { + return std::isprint(static_cast(c)) != 0; + } + + wchar_t is_print(wchar_t c) + { + return std::iswprint(static_cast(c)) != 0; + } + + char is_space(char c) + { + return std::isspace(static_cast(c)) != 0; + } + + wchar_t is_space(wchar_t c) + { + return std::iswspace(static_cast(c)) != 0; + } + + char is_blank(char c) + { + return std::isblank(static_cast(c)) != 0; + } + + wchar_t is_blank(wchar_t c) + { + return std::iswblank(static_cast(c)) != 0; + } + + char is_graphic(char c) + { + return std::isgraph(static_cast(c)) != 0; + } + + wchar_t is_graphic(wchar_t c) + { + return std::iswgraph(static_cast(c)) != 0; + } + + char is_digit(char c) + { + return std::isdigit(static_cast(c)) != 0; + } + + wchar_t is_digit(wchar_t c) + { + return std::iswdigit(static_cast(c)) != 0; + } + + char is_alpha(char c) + { + return std::isalpha(static_cast(c)) != 0; + } + + wchar_t is_alpha(wchar_t c) + { + return std::iswalpha(static_cast(c)) != 0; + } + + char is_alphanum(char c) + { + return std::isalnum(static_cast(c)) != 0; + } + + wchar_t is_alphanum(wchar_t c) + { + return std::iswalnum(static_cast(c)) != 0; + } + + char is_lower(char c) + { + return std::islower(static_cast(c)) != 0; + } + + wchar_t is_lower(wchar_t c) + { + return std::iswlower(static_cast(c)) != 0; + } + + char is_upper(char c) + { + return std::isupper(static_cast(c)) != 0; + } + + wchar_t is_upper(wchar_t c) + { + return std::iswupper(static_cast(c)) != 0; + } char to_lower(char c) { @@ -27,6 +129,10 @@ namespace mamba return static_cast(std::tolower(static_cast(c))); } + /*************************************************** + * Implementation of to_lower to_upper functions * + ***************************************************/ + namespace { template From fc2ec4481cedc90f3c24830d858543a23f34ed07 Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Thu, 2 Mar 2023 17:13:46 +0100 Subject: [PATCH 2/5] Add strip_if functions --- libmamba/include/mamba/core/util_string.hpp | 87 +++++++++++++++++++++ libmamba/src/core/util_string.cpp | 21 ++--- libmamba/tests/test_util_string.cpp | 63 ++++++++++++--- 3 files changed, 150 insertions(+), 21 deletions(-) diff --git a/libmamba/include/mamba/core/util_string.hpp b/libmamba/include/mamba/core/util_string.hpp index 8f86aec7e2..3b3ed7e7eb 100644 --- a/libmamba/include/mamba/core/util_string.hpp +++ b/libmamba/include/mamba/core/util_string.hpp @@ -100,6 +100,11 @@ namespace mamba std::string_view lstrip(std::string_view input); std::wstring_view lstrip(std::wstring_view input); + template + std::string_view lstrip_if(std::string_view input, UnaryFunc should_strip); + template + std::wstring_view lstrip_if(std::wstring_view input, UnaryFunc should_strip); + std::string_view rstrip(std::string_view input, char c); std::wstring_view rstrip(std::wstring_view input, wchar_t c); std::string_view rstrip(std::string_view input, std::string_view chars); @@ -107,6 +112,11 @@ namespace mamba std::string_view rstrip(std::string_view input); std::wstring_view rstrip(std::wstring_view input); + template + std::string_view rstrip_if(std::string_view input, UnaryFunc should_strip); + template + std::wstring_view rstrip_if(std::wstring_view input, UnaryFunc should_strip); + std::string_view strip(std::string_view input, char c); std::wstring_view strip(std::wstring_view input, wchar_t c); std::string_view strip(std::string_view input, std::string_view chars); @@ -114,6 +124,11 @@ namespace mamba std::string_view strip(std::string_view input); std::wstring_view strip(std::wstring_view input); + template + std::string_view strip_if(std::string_view input, UnaryFunc should_strip); + template + std::wstring_view strip_if(std::wstring_view input, UnaryFunc should_strip); + std::vector split(std::string_view input, std::string_view sep, std::size_t max_split = SIZE_MAX); std::vector @@ -276,6 +291,78 @@ namespace mamba extern template bool starts_with_any(std::string_view, const std::vector&); extern template bool starts_with_any(std::string_view, const std::vector&); + /*************************************** + * Implementation of strip functions * + ***************************************/ + + namespace detail + { + template + std::basic_string_view + lstrip_if_impl(std::basic_string_view input, UnaryFunc should_strip) + { + const auto start_iter = std::find_if( + input.cbegin(), + input.cend(), + [&should_strip](Char c) -> bool { return !should_strip(c); } + ); + const auto start_idx = static_cast(start_iter - input.cbegin()); + return input.substr(start_idx); + } + } + + template + std::string_view lstrip_if(std::string_view input, UnaryFunc should_strip) + { + return detail::lstrip_if_impl(input, std::move(should_strip)); + } + + template + std::wstring_view lstrip_if(std::wstring_view input, UnaryFunc should_strip) + { + return detail::lstrip_if_impl(input, std::move(should_strip)); + } + + namespace detail + { + template + std::basic_string_view + rstrip_if_impl(std::basic_string_view input, UnaryFunc should_strip) + { + const auto rstart_iter = std::find_if( + input.crbegin(), + input.crend(), + [&should_strip](Char c) -> bool { return !should_strip(c); } + ); + const auto past_end_idx = static_cast(input.crend() - rstart_iter); + return input.substr(0, past_end_idx); + } + } + + template + std::string_view rstrip_if(std::string_view input, UnaryFunc should_strip) + { + return detail::rstrip_if_impl(input, std::move(should_strip)); + } + + template + std::wstring_view rstrip_if(std::wstring_view input, UnaryFunc should_strip) + { + return detail::rstrip_if_impl(input, std::move(should_strip)); + } + + template + std::string_view strip_if(std::string_view input, UnaryFunc should_strip) + { + return rstrip_if(lstrip_if(input, should_strip), should_strip); + } + + template + std::wstring_view strip_if(std::wstring_view input, UnaryFunc should_strip) + { + return rstrip_if(lstrip_if(input, should_strip), should_strip); + } + /************************************** * Implementation of join functions * **************************************/ diff --git a/libmamba/src/core/util_string.cpp b/libmamba/src/core/util_string.cpp index 57cea9f1ee..c88ae31a2a 100644 --- a/libmamba/src/core/util_string.cpp +++ b/libmamba/src/core/util_string.cpp @@ -254,9 +254,6 @@ namespace mamba namespace { - constexpr const char* WHITESPACES(" \r\n\t\f\v"); - constexpr const wchar_t* WHITESPACES_WSTR(L" \r\n\t\f\v"); - template std::basic_string_view lstrip_impl(std::basic_string_view input, CharOrStrView chars) @@ -285,11 +282,13 @@ namespace mamba } std::string_view lstrip(std::string_view input) { - return lstrip(input, WHITESPACES); + using Char = decltype(input)::value_type; + return lstrip_if(input, [](Char c) { return !is_graphic(c); }); } std::wstring_view lstrip(std::wstring_view input) { - return lstrip(input, WHITESPACES_WSTR); + using Char = decltype(input)::value_type; + return lstrip_if(input, [](Char c) { return !is_graphic(c); }); } namespace @@ -322,11 +321,13 @@ namespace mamba } std::string_view rstrip(std::string_view input) { - return rstrip(input, WHITESPACES); + using Char = decltype(input)::value_type; + return rstrip_if(input, [](Char c) { return !is_graphic(c); }); } std::wstring_view rstrip(std::wstring_view input) { - return rstrip(input, WHITESPACES_WSTR); + using Char = decltype(input)::value_type; + return rstrip_if(input, [](Char c) { return !is_graphic(c); }); } namespace @@ -364,11 +365,13 @@ namespace mamba } std::string_view strip(std::string_view input) { - return strip(input, WHITESPACES); + using Char = decltype(input)::value_type; + return strip_if(input, [](Char c) { return !is_graphic(c); }); } std::wstring_view strip(std::wstring_view input) { - return strip(input, WHITESPACES_WSTR); + using Char = decltype(input)::value_type; + return strip_if(input, [](Char c) { return !is_graphic(c); }); } /*************************************** diff --git a/libmamba/tests/test_util_string.cpp b/libmamba/tests/test_util_string.cpp index 37353ee34c..49c31c4ba8 100644 --- a/libmamba/tests/test_util_string.cpp +++ b/libmamba/tests/test_util_string.cpp @@ -4,6 +4,9 @@ // // The full license is in the file LICENSE, distributed with this software. +#include +#include +#include #include #include @@ -80,28 +83,37 @@ namespace mamba EXPECT_TRUE(starts_with_any("áäᜩgþhëb®hüghœ©®xb", StrVec{ "áäᜩgþhëb", "®hüghœ©®xb" })); } - TEST(util_string, strip) - { - EXPECT_EQ(strip(" hello \t\n"), "hello"); - EXPECT_EQ(strip(":::hello%:%", ":%"), "hello"); - EXPECT_EQ(strip(":::hello%:%", ":"), "hello%:%"); - EXPECT_EQ(strip(":::hello%:%", ":"), "hello%:%"); - } - TEST(util_string, lstrip) { EXPECT_EQ(lstrip("\n \thello \t\n"), "hello \t\n"); EXPECT_EQ(lstrip(":::hello%:%", ":%"), "hello%:%"); - EXPECT_EQ(lstrip(":::hello%:%", ":"), "hello%:%"); - EXPECT_EQ(lstrip(":::hello%:%", "%"), ":::hello%:%"); + EXPECT_EQ(lstrip(":::hello%:%", ':'), "hello%:%"); + EXPECT_EQ(lstrip(":::hello%:%", '%'), ":::hello%:%"); + EXPECT_EQ(lstrip("", '%'), ""); + EXPECT_EQ(lstrip("aaa", 'a'), ""); + EXPECT_EQ(lstrip("aaa", 'b'), "aaa"); } TEST(util_string, rstrip) { EXPECT_EQ(rstrip("\n \thello \t\n"), "\n \thello"); - EXPECT_EQ(rstrip(":::hello%:%", "%"), ":::hello%:"); + EXPECT_EQ(rstrip(":::hello%:%", '%'), ":::hello%:"); EXPECT_EQ(rstrip(":::hello%:%", ":%"), ":::hello"); - EXPECT_EQ(rstrip(":::hello%:%", ":"), ":::hello%:%"); + EXPECT_EQ(rstrip(":::hello%:%", ':'), ":::hello%:%"); + EXPECT_EQ(rstrip("", '%'), ""); + EXPECT_EQ(rstrip("aaa", 'a'), ""); + EXPECT_EQ(rstrip("aaa", 'b'), "aaa"); + } + + TEST(util_string, strip) + { + EXPECT_EQ(strip(" hello \t\n"), "hello"); + EXPECT_EQ(strip(":::hello%:%", ":%"), "hello"); + EXPECT_EQ(strip(":::hello%:%", ':'), "hello%:%"); + EXPECT_EQ(strip(":::hello%:%", ':'), "hello%:%"); + EXPECT_EQ(strip("", '%'), ""); + EXPECT_EQ(strip("aaa", 'a'), ""); + EXPECT_EQ(strip("aaa", 'b'), "aaa"); } TEST(utils, strip_whitespaces) @@ -164,6 +176,33 @@ namespace mamba } } + TEST(util_string, lstrip_if) + { + EXPECT_EQ(lstrip_if("", [](auto) { return true; }), ""); + EXPECT_EQ(lstrip_if("hello", [](auto) { return true; }), ""); + EXPECT_EQ(lstrip_if("hello", [](auto) { return false; }), "hello"); + EXPECT_EQ(lstrip_if("\n \thello \t\n", [](auto c) { return !is_alphanum(c); }), "hello \t\n"); + EXPECT_EQ(lstrip_if("123hello456", [](auto c) { return is_digit(c); }), "hello456"); + } + + TEST(util_string, rstrip_if) + { + EXPECT_EQ(rstrip_if("", [](auto) { return true; }), ""); + EXPECT_EQ(rstrip_if("hello", [](auto) { return true; }), ""); + EXPECT_EQ(rstrip_if("hello", [](auto) { return false; }), "hello"); + EXPECT_EQ(rstrip_if("\n \thello \t\n", [](auto c) { return !is_alphanum(c); }), "\n \thello"); + EXPECT_EQ(rstrip_if("123hello456", [](auto c) { return is_digit(c); }), "123hello"); + } + + TEST(util_string, strip_if) + { + EXPECT_EQ(strip_if("", [](auto) { return true; }), ""); + EXPECT_EQ(strip_if("hello", [](auto) { return true; }), ""); + EXPECT_EQ(strip_if("hello", [](auto) { return false; }), "hello"); + EXPECT_EQ(strip_if("\n \thello \t\n", [](auto c) { return !is_alphanum(c); }), "hello"); + EXPECT_EQ(strip_if("123hello456", [](auto c) { return is_digit(c); }), "hello"); + } + TEST(util_string, split) { std::string a = "hello.again.it's.me.mario"; From 55630e271ee6348a63b4a14571bf7eeb2a03e7f0 Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Fri, 3 Mar 2023 10:05:14 +0100 Subject: [PATCH 3/5] C++20 comments --- libmamba/include/mamba/core/util_string.hpp | 2 +- libmamba/src/core/util_string.cpp | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/libmamba/include/mamba/core/util_string.hpp b/libmamba/include/mamba/core/util_string.hpp index 3b3ed7e7eb..7548a4fd5f 100644 --- a/libmamba/include/mamba/core/util_string.hpp +++ b/libmamba/include/mamba/core/util_string.hpp @@ -398,7 +398,7 @@ namespace mamba } } - // TODO(C++20) Take an input range and return a range + // TODO(C++20) This is a range template UnaryFunction join_for_each(InputIt first, InputIt last, UnaryFunction func, const Value& sep) { diff --git a/libmamba/src/core/util_string.cpp b/libmamba/src/core/util_string.cpp index c88ae31a2a..b80114ee2d 100644 --- a/libmamba/src/core/util_string.cpp +++ b/libmamba/src/core/util_string.cpp @@ -254,6 +254,8 @@ namespace mamba namespace { + // string_view has a different overload for ``find(char)`` and ``find(string_view)`` + // so we want to leverage that. template std::basic_string_view lstrip_impl(std::basic_string_view input, CharOrStrView chars) @@ -293,6 +295,8 @@ namespace mamba namespace { + // string_view has a different overload for ``find(char)`` and ``find(string_view)`` + // so we want to leverage that. template std::basic_string_view rstrip_impl(std::basic_string_view input, CharOrStrView chars) @@ -332,6 +336,8 @@ namespace mamba namespace { + // string_view has a different overload for ``find(char)`` and ``find(string_view)`` + // so we want to leverage that. template std::basic_string_view strip_impl(std::basic_string_view input, CharOrStrView chars) @@ -465,24 +471,28 @@ namespace mamba } } + // TODO(C++20) lazy_split_view is a range std::vector split(std::string_view input, std::string_view sep, std::size_t max_split) { return split(input, sep, max_split); } + // TODO(C++20) lazy_split_view is a range std::vector split(std::wstring_view input, std::wstring_view sep, std::size_t max_split) { return split(input, sep, max_split); } + // TODO(C++20) lazy_split_view is a range std::vector rsplit(std::string_view input, std::string_view sep, std::size_t max_split) { return rsplit(input, sep, max_split); } + // TODO(C++20) lazy_split_view is a range std::vector rsplit(std::wstring_view input, std::wstring_view sep, std::size_t max_split) { From fade6baafa86877e8b2fb82ffc1ba69d7f53f52b Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Mon, 6 Mar 2023 11:39:57 +0100 Subject: [PATCH 4/5] Add review from @Hind-M --- libmamba/include/mamba/core/util_string.hpp | 42 ++++++++++----------- libmamba/src/core/util_string.cpp | 40 ++++++++++---------- 2 files changed, 41 insertions(+), 41 deletions(-) diff --git a/libmamba/include/mamba/core/util_string.hpp b/libmamba/include/mamba/core/util_string.hpp index 7548a4fd5f..2ead68c3a0 100644 --- a/libmamba/include/mamba/core/util_string.hpp +++ b/libmamba/include/mamba/core/util_string.hpp @@ -30,26 +30,26 @@ namespace mamba /** * Safe non utf-8 wrapping of (see its doc). */ - char is_control(char c); - wchar_t is_control(wchar_t c); - char is_print(char c); - wchar_t is_print(wchar_t c); - char is_space(char c); - wchar_t is_space(wchar_t c); - char is_blank(char c); - wchar_t is_blank(wchar_t c); - char is_graphic(char c); - wchar_t is_graphic(wchar_t c); - char is_digit(char c); - wchar_t is_digit(wchar_t c); - char is_alpha(char c); - wchar_t is_alpha(wchar_t c); - char is_alphanum(char c); - wchar_t is_alphanum(wchar_t c); - char is_lower(char c); - wchar_t is_lower(wchar_t c); - char is_upper(char c); - wchar_t is_upper(wchar_t c); + bool is_control(char c); + bool is_control(wchar_t c); + bool is_print(char c); + bool is_print(wchar_t c); + bool is_space(char c); + bool is_space(wchar_t c); + bool is_blank(char c); + bool is_blank(wchar_t c); + bool is_graphic(char c); + bool is_graphic(wchar_t c); + bool is_digit(char c); + bool is_digit(wchar_t c); + bool is_alpha(char c); + bool is_alpha(wchar_t c); + bool is_alphanum(char c); + bool is_alphanum(wchar_t c); + bool is_lower(char c); + bool is_lower(wchar_t c); + bool is_upper(char c); + bool is_upper(wchar_t c); char to_lower(char c); wchar_t to_lower(wchar_t c); @@ -398,7 +398,7 @@ namespace mamba } } - // TODO(C++20) This is a range + // TODO(C++20) Use ``std::ranges::join_view`` (or ``std::ranges::join``) template UnaryFunction join_for_each(InputIt first, InputIt last, UnaryFunction func, const Value& sep) { diff --git a/libmamba/src/core/util_string.cpp b/libmamba/src/core/util_string.cpp index b80114ee2d..07caad5ca0 100644 --- a/libmamba/src/core/util_string.cpp +++ b/libmamba/src/core/util_string.cpp @@ -19,102 +19,102 @@ namespace mamba * Implementation of cctype functions * ****************************************/ - char is_control(char c) + bool is_control(char c) { return std::iscntrl(static_cast(c)) != 0; } - wchar_t is_control(wchar_t c) + bool is_control(wchar_t c) { return std::iswcntrl(static_cast(c)) != 0; } - char is_print(char c) + bool is_print(char c) { return std::isprint(static_cast(c)) != 0; } - wchar_t is_print(wchar_t c) + bool is_print(wchar_t c) { return std::iswprint(static_cast(c)) != 0; } - char is_space(char c) + bool is_space(char c) { return std::isspace(static_cast(c)) != 0; } - wchar_t is_space(wchar_t c) + bool is_space(wchar_t c) { return std::iswspace(static_cast(c)) != 0; } - char is_blank(char c) + bool is_blank(char c) { return std::isblank(static_cast(c)) != 0; } - wchar_t is_blank(wchar_t c) + bool is_blank(wchar_t c) { return std::iswblank(static_cast(c)) != 0; } - char is_graphic(char c) + bool is_graphic(char c) { return std::isgraph(static_cast(c)) != 0; } - wchar_t is_graphic(wchar_t c) + bool is_graphic(wchar_t c) { return std::iswgraph(static_cast(c)) != 0; } - char is_digit(char c) + bool is_digit(char c) { return std::isdigit(static_cast(c)) != 0; } - wchar_t is_digit(wchar_t c) + bool is_digit(wchar_t c) { return std::iswdigit(static_cast(c)) != 0; } - char is_alpha(char c) + bool is_alpha(char c) { return std::isalpha(static_cast(c)) != 0; } - wchar_t is_alpha(wchar_t c) + bool is_alpha(wchar_t c) { return std::iswalpha(static_cast(c)) != 0; } - char is_alphanum(char c) + bool is_alphanum(char c) { return std::isalnum(static_cast(c)) != 0; } - wchar_t is_alphanum(wchar_t c) + bool is_alphanum(wchar_t c) { return std::iswalnum(static_cast(c)) != 0; } - char is_lower(char c) + bool is_lower(char c) { return std::islower(static_cast(c)) != 0; } - wchar_t is_lower(wchar_t c) + bool is_lower(wchar_t c) { return std::iswlower(static_cast(c)) != 0; } - char is_upper(char c) + bool is_upper(char c) { return std::isupper(static_cast(c)) != 0; } - wchar_t is_upper(wchar_t c) + bool is_upper(wchar_t c) { return std::iswupper(static_cast(c)) != 0; } From b161a8badd00098fae025c0afec993779e4e0573 Mon Sep 17 00:00:00 2001 From: Johan Mabille Date: Wed, 8 Mar 2023 09:16:50 +0100 Subject: [PATCH 5/5] Update libmamba/src/core/util_string.cpp Co-authored-by: Antoine Prouvost --- libmamba/src/core/util_string.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libmamba/src/core/util_string.cpp b/libmamba/src/core/util_string.cpp index 07caad5ca0..6e04167434 100644 --- a/libmamba/src/core/util_string.cpp +++ b/libmamba/src/core/util_string.cpp @@ -126,7 +126,7 @@ namespace mamba wchar_t to_lower(wchar_t c) { - return static_cast(std::tolower(static_cast(c))); + return static_cast(std::towlower(static_cast(c))); } /***************************************************