diff --git a/CMakeLists.txt b/CMakeLists.txt index f47ad394..5f97f91d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,5 @@ # Copyright 2020, 2021 Peter Dimov +# Copyright 2023-2024 Alexander Grund # Distributed under the Boost Software License, Version 1.0. # https://www.boost.org/LICENSE_1_0.txt @@ -51,6 +52,7 @@ target_link_libraries(boost_locale Boost::iterator Boost::utility PRIVATE + Boost::charconv Boost::predef Boost::thread ) diff --git a/build/Jamfile.v2 b/build/Jamfile.v2 index 7889db6b..7b505343 100644 --- a/build/Jamfile.v2 +++ b/build/Jamfile.v2 @@ -1,6 +1,6 @@ # Copyright 2003 John Maddock # Copyright 2010 Artyom Beilis -# Copyright 2021 - 2022 Alexander Grund +# Copyright 2021 - 2024 Alexander Grund # # Distributed under the Boost Software License, Version 1.0. # https://www.boost.org/LICENSE_1_0.txt. @@ -16,6 +16,7 @@ import toolset ; path-constant TOP : .. ; constant boost_dependencies_private : + /boost/charconv//boost_charconv /boost/predef//boost_predef /boost/thread//boost_thread ; diff --git a/doc/changelog.txt b/doc/changelog.txt index 3e4fe789..1809777c 100644 --- a/doc/changelog.txt +++ b/doc/changelog.txt @@ -10,6 +10,7 @@ - 1.88.0 - Fix parsing of numbers in floating point format to integers + - Require ICU 4.2 or later - 1.86.0 - Make ICU implementation of `to_title` threadsafe - Add allocator support to `utf_to_utf` diff --git a/src/boost/locale/icu/collator.cpp b/src/boost/locale/icu/collator.cpp index 72ade74f..f5977242 100644 --- a/src/boost/locale/icu/collator.cpp +++ b/src/boost/locale/icu/collator.cpp @@ -16,13 +16,8 @@ #include #include #include +#include #include -#if BOOST_LOCALE_ICU_VERSION >= 402 -# define BOOST_LOCALE_WITH_STRINGPIECE 1 -# include -#else -# define BOOST_LOCALE_WITH_STRINGPIECE 0 -#endif #ifdef BOOST_MSVC # pragma warning(disable : 4244) // 'argument' : conversion from 'int' @@ -43,7 +38,6 @@ namespace boost { namespace locale { namespace impl_icu { return res; } -#if BOOST_LOCALE_WITH_STRINGPIECE int do_utf8_compare(collate_level level, const char* b1, const char* e1, @@ -55,7 +49,6 @@ namespace boost { namespace locale { namespace impl_icu { icu::StringPiece right(b2, e2 - b2); return get_collator(level).compareUTF8(left, right, status); } -#endif int do_ustring_compare(collate_level level, const CharType* b1, @@ -159,7 +152,6 @@ namespace boost { namespace locale { namespace impl_icu { bool is_utf8_; }; -#if BOOST_LOCALE_WITH_STRINGPIECE template<> int collate_impl::do_real_compare(collate_level level, const char* b1, @@ -173,7 +165,7 @@ namespace boost { namespace locale { namespace impl_icu { else return do_ustring_compare(level, b1, e1, b2, e2, status); } -#endif + std::locale create_collate(const std::locale& in, const cdata& cd, char_facet_t type) { switch(type) { diff --git a/src/boost/locale/icu/conversion.cpp b/src/boost/locale/icu/conversion.cpp index c619dd94..3a4acbbd 100644 --- a/src/boost/locale/icu/conversion.cpp +++ b/src/boost/locale/icu/conversion.cpp @@ -13,11 +13,8 @@ #include #include #include +#include #include -#if BOOST_LOCALE_ICU_VERSION >= 308 -# include -# define BOOST_LOCALE_WITH_CASEMAP -#endif #include namespace boost { namespace locale { namespace impl_icu { @@ -72,7 +69,6 @@ namespace boost { namespace locale { namespace impl_icu { std::string encoding_; }; // converter_impl -#ifdef BOOST_LOCALE_WITH_CASEMAP template struct get_casemap_size_type; @@ -193,26 +189,17 @@ namespace boost { namespace locale { namespace impl_icu { raii_casemap map_; }; // converter_impl -#endif // BOOST_LOCALE_WITH_CASEMAP - std::locale create_convert(const std::locale& in, const cdata& cd, char_facet_t type) { switch(type) { case char_facet_t::nochar: break; case char_facet_t::char_f: -#ifdef BOOST_LOCALE_WITH_CASEMAP if(cd.is_utf8()) return std::locale(in, new utf8_converter_impl(cd)); -#endif return std::locale(in, new converter_impl(cd)); case char_facet_t::wchar_f: return std::locale(in, new converter_impl(cd)); #ifndef BOOST_LOCALE_NO_CXX20_STRING8 - case char_facet_t::char8_f: -# if defined(BOOST_LOCALE_WITH_CASEMAP) - return std::locale(in, new utf8_converter_impl(cd)); -# else - return std::locale(in, new converter_impl(cd)); -# endif + case char_facet_t::char8_f: return std::locale(in, new utf8_converter_impl(cd)); #elif defined(__cpp_char8_t) case char_facet_t::char8_f: break; #endif diff --git a/src/boost/locale/icu/date_time.cpp b/src/boost/locale/icu/date_time.cpp index 14e21d9f..36e4ca30 100644 --- a/src/boost/locale/icu/date_time.cpp +++ b/src/boost/locale/icu/date_time.cpp @@ -67,10 +67,6 @@ namespace boost { namespace locale { namespace impl_icu { const double rounded_time = std::floor(calendar_->getTime(err) / U_MILLIS_PER_SECOND) * U_MILLIS_PER_SECOND; calendar_->setTime(rounded_time, err); check_and_throw_dt(err); -#if BOOST_LOCALE_ICU_VERSION < 402 - // workaround old/invalid data, it should be 4 in general - calendar_->setMinimalDaysInFirstWeek(4); -#endif encoding_ = dat.encoding(); } calendar_impl(const calendar_impl& other) @@ -79,15 +75,9 @@ namespace boost { namespace locale { namespace impl_icu { encoding_ = other.encoding_; } - calendar_impl* clone() const override - { - return new calendar_impl(*this); - } + calendar_impl* clone() const override { return new calendar_impl(*this); } - void set_value(period::marks::period_mark p, int value) override - { - calendar_->set(to_icu(p), int32_t(value)); - } + void set_value(period::marks::period_mark p, int value) override { calendar_->set(to_icu(p), int32_t(value)); } int get_value(period::marks::period_mark p, value_type type) const override { @@ -202,10 +192,7 @@ namespace boost { namespace locale { namespace impl_icu { check_and_throw_dt(err); return diff; } - void set_timezone(const std::string& tz) override - { - calendar_->adoptTimeZone(get_time_zone(tz)); - } + void set_timezone(const std::string& tz) override { calendar_->adoptTimeZone(get_time_zone(tz)); } std::string get_timezone() const override { icu::UnicodeString tz; diff --git a/src/boost/locale/icu/formatter.cpp b/src/boost/locale/icu/formatter.cpp index 955b3d18..355d8307 100644 --- a/src/boost/locale/icu/formatter.cpp +++ b/src/boost/locale/icu/formatter.cpp @@ -1,6 +1,6 @@ // // Copyright (c) 2009-2011 Artyom Beilis (Tonkikh) -// Copyright (c) 2021-2023 Alexander Grund +// Copyright (c) 2021-2024 Alexander Grund // // Distributed under the Boost Software License, Version 1.0. // https://www.boost.org/LICENSE_1_0.txt @@ -13,8 +13,12 @@ #include "boost/locale/icu/time_zone.hpp" #include "boost/locale/icu/uconv.hpp" #include "boost/locale/util/foreach_char.hpp" +#include +#include +#include #include #include +#include #ifdef BOOST_MSVC # pragma warning(push) # pragma warning(disable : 4251) // "identifier" : class "type" needs to have dll-interface... @@ -62,35 +66,69 @@ namespace boost { namespace locale { namespace impl_icu { string_type format(int64_t value, size_t& code_points) const override { return do_format(value, code_points); } string_type format(int32_t value, size_t& code_points) const override { return do_format(value, code_points); } size_t parse(const string_type& str, double& value) const override { return do_parse(str, value); } + size_t parse(const string_type& str, uint64_t& value) const override { return do_parse(str, value); } size_t parse(const string_type& str, int64_t& value) const override { return do_parse(str, value); } size_t parse(const string_type& str, int32_t& value) const override { return do_parse(str, value); } + string_type format(const uint64_t value, size_t& code_points) const override + { + // ICU only supports int64_t as the largest integer type + if(value <= static_cast(std::numeric_limits::max())) + return format(static_cast(value), code_points); + + // Fallback to using a StringPiece (decimal number) as input + char buffer[std::numeric_limits::digits10 + 2]; + auto res = boost::charconv::to_chars(buffer, std::end(buffer), value); + BOOST_ASSERT(res); + *res.ptr = '\0'; // ICU expects a NULL-terminated string even for the StringPiece + icu::UnicodeString tmp; + UErrorCode err = U_ZERO_ERROR; + icu_fmt_.format(icu::StringPiece(buffer, res.ptr - buffer), tmp, nullptr, err); + check_and_throw_icu_error(err); + code_points = tmp.countChar32(); + return cvt_.std(tmp); + } + private: bool get_value(double& v, icu::Formattable& fmt) const { UErrorCode err = U_ZERO_ERROR; v = fmt.getDouble(err); - if(U_FAILURE(err)) - return false; - return true; + return U_SUCCESS(err); } bool get_value(int64_t& v, icu::Formattable& fmt) const { UErrorCode err = U_ZERO_ERROR; v = fmt.getInt64(err); + return U_SUCCESS(err); + } + + bool get_value(uint64_t& v, icu::Formattable& fmt) const + { + UErrorCode err = U_ZERO_ERROR; + // ICU only supports int64_t as the largest integer type + const int64_t tmp = fmt.getInt64(err); + if(U_SUCCESS(err)) { + if(tmp < 0) + return false; + v = static_cast(tmp); + return true; + } + // Get value as a decimal number and parse that + err = U_ZERO_ERROR; + const auto decimals = fmt.getDecimalNumber(err); if(U_FAILURE(err)) - return false; - return true; + return false; // Not a number + const auto res = boost::charconv::from_chars({decimals.data(), static_cast(decimals.length())}, v); + return static_cast(res); } bool get_value(int32_t& v, icu::Formattable& fmt) const { UErrorCode err = U_ZERO_ERROR; v = fmt.getLong(err); - if(U_FAILURE(err)) - return false; - return true; + return U_SUCCESS(err); } template @@ -114,14 +152,11 @@ namespace boost { namespace locale { namespace impl_icu { icu_fmt_.setParseIntegerOnly(std::is_integral::value && isNumberOnly_); icu_fmt_.parse(tmp, val, pp); - ValueType tmp_v; - - if(pp.getIndex() == 0 || !get_value(tmp_v, val)) + if(pp.getIndex() == 0 || !get_value(v, val)) return 0; size_t cut = cvt_.cut(tmp, str.data(), str.data() + str.size(), pp.getIndex()); if(cut == 0) return 0; - v = tmp_v; return cut; } @@ -136,11 +171,11 @@ namespace boost { namespace locale { namespace impl_icu { typedef std::basic_string string_type; string_type format(double value, size_t& code_points) const override { return do_format(value, code_points); } + string_type format(uint64_t value, size_t& code_points) const override { return do_format(value, code_points); } string_type format(int64_t value, size_t& code_points) const override { return do_format(value, code_points); } - string_type format(int32_t value, size_t& code_points) const override { return do_format(value, code_points); } - size_t parse(const string_type& str, double& value) const override { return do_parse(str, value); } + size_t parse(const string_type& str, uint64_t& value) const override { return do_parse(str, value); } size_t parse(const string_type& str, int64_t& value) const override { return do_parse(str, value); } size_t parse(const string_type& str, int32_t& value) const override { return do_parse(str, value); } diff --git a/src/boost/locale/icu/formatter.hpp b/src/boost/locale/icu/formatter.hpp index 110a0315..bcda2788 100644 --- a/src/boost/locale/icu/formatter.hpp +++ b/src/boost/locale/icu/formatter.hpp @@ -1,5 +1,6 @@ // // Copyright (c) 2009-2011 Artyom Beilis (Tonkikh) +// Copyright (c) 2024 Alexander Grund // // Distributed under the Boost Software License, Version 1.0. // https://www.boost.org/LICENSE_1_0.txt @@ -31,6 +32,8 @@ namespace boost { namespace locale { namespace impl_icu { /// Format the value and return the number of Unicode code points virtual string_type format(double value, size_t& code_points) const = 0; /// Format the value and return the number of Unicode code points + virtual string_type format(uint64_t value, size_t& code_points) const = 0; + /// Format the value and return the number of Unicode code points virtual string_type format(int64_t value, size_t& code_points) const = 0; /// Format the value and return the number of Unicode code points virtual string_type format(int32_t value, size_t& code_points) const = 0; @@ -40,6 +43,9 @@ namespace boost { namespace locale { namespace impl_icu { virtual size_t parse(const string_type& str, double& value) const = 0; /// Parse the string and return the number of used characters. If it returns 0 /// then parsing failed. + virtual size_t parse(const string_type& str, uint64_t& value) const = 0; + /// Parse the string and return the number of used characters. If it returns 0 + /// then parsing failed. virtual size_t parse(const string_type& str, int64_t& value) const = 0; /// Parse the string and return the number of used characters. If it returns 0 /// then parsing failed. diff --git a/src/boost/locale/icu/formatters_cache.cpp b/src/boost/locale/icu/formatters_cache.cpp index c9a8c463..64ea9c8e 100644 --- a/src/boost/locale/icu/formatters_cache.cpp +++ b/src/boost/locale/icu/formatters_cache.cpp @@ -92,16 +92,13 @@ namespace boost { namespace locale { namespace impl_icu { case num_fmt_type::curr_iso: return icu::NumberFormat::createInstance(locale_, UNUM_CURRENCY_ISO, err); break; -#elif BOOST_LOCALE_ICU_VERSION >= 402 +#else case num_fmt_type::curr_nat: return icu::NumberFormat::createInstance(locale_, icu::NumberFormat::kCurrencyStyle, err); break; case num_fmt_type::curr_iso: return icu::NumberFormat::createInstance(locale_, icu::NumberFormat::kIsoCurrencyStyle, err); break; -#else - case num_fmt_type::curr_nat: - case num_fmt_type::curr_iso: return icu::NumberFormat::createCurrencyInstance(locale_, err); break; #endif case num_fmt_type::percent: return icu::NumberFormat::createPercentInstance(locale_, err); break; case num_fmt_type::spell: return new icu::RuleBasedNumberFormat(icu::URBNF_SPELLOUT, locale_, err); break; diff --git a/src/boost/locale/icu/numeric.cpp b/src/boost/locale/icu/numeric.cpp index 9002a233..986e6a2a 100644 --- a/src/boost/locale/icu/numeric.cpp +++ b/src/boost/locale/icu/numeric.cpp @@ -1,5 +1,6 @@ // // Copyright (c) 2009-2011 Artyom Beilis (Tonkikh) +// Copyright (c) 2024 Alexander Grund // // Distributed under the Boost Software License, Version 1.0. // https://www.boost.org/LICENSE_1_0.txt @@ -19,41 +20,31 @@ namespace boost { namespace locale { namespace impl_icu { namespace detail { - template::is_integer> - struct icu_format_type; + template + struct choose_type_by_digits + : std::conditional::digits <= std::numeric_limits::digits, + PreferredType, + AlternativeType> {}; - template - struct icu_format_type { - // ICU supports 32 and 64 bit ints, use the former as long as it fits, else the latter - typedef typename std::conditional::digits <= 31, int32_t, int64_t>::type type; + template::is_integer> + struct icu_format_type { + static_assert(sizeof(T) <= sizeof(int64_t), "Only up to 64 bit integer types are supported by ICU"); + // ICU supports (only) int32_t and int64_t, use the former as long as it fits, else the latter + using large_type = typename choose_type_by_digits::type; + using type = typename choose_type_by_digits::type; }; template struct icu_format_type { // Only float type ICU supports is double - typedef double type; - }; - - // ICU does not support uint64_t values so fall back to the parent/std formatting - // if the number is to large to fit into an int64_t - template::is_signed && std::numeric_limits::is_integer - && (sizeof(T) >= sizeof(uint64_t))> - struct use_parent_traits { - static bool use(T /*v*/) { return false; } - }; - template - struct use_parent_traits { - static bool use(T v) { return v > static_cast(std::numeric_limits::max()); } + using type = double; }; template - static bool use_parent(std::ios_base& ios, ValueType v) + static bool use_parent(std::ios_base& ios) { const uint64_t flg = ios_info::get(ios).display_flags(); if(flg == flags::posix) return true; - if(use_parent_traits::use(v)) - return true; if(!std::numeric_limits::is_integer) return false; @@ -105,7 +96,7 @@ namespace boost { namespace locale { namespace impl_icu { template iter_type do_real_put(iter_type out, std::ios_base& ios, CharType fill, ValueType val) const { - if(detail::use_parent(ios, val)) + if(detail::use_parent(ios)) return std::num_put::do_put(out, ios, fill, val); const std::unique_ptr formatter = formatter_type::create(ios, loc_, enc_); @@ -240,7 +231,7 @@ namespace boost { namespace locale { namespace impl_icu { do_real_get(iter_type in, iter_type end, std::ios_base& ios, std::ios_base::iostate& err, ValueType& val) const { stream_type* stream_ptr = dynamic_cast(&ios); - if(!stream_ptr || detail::use_parent(ios, ValueType(0))) + if(!stream_ptr || detail::use_parent(ios)) return std::num_get::do_get(in, end, ios, err, val); const std::unique_ptr formatter = formatter_type::create(ios, loc_, enc_); diff --git a/src/boost/locale/icu/time_zone.cpp b/src/boost/locale/icu/time_zone.cpp index 2f11862a..b6dfa024 100644 --- a/src/boost/locale/icu/time_zone.cpp +++ b/src/boost/locale/icu/time_zone.cpp @@ -13,14 +13,13 @@ // Bug - when ICU tries to find a file that is equivalent to /etc/localtime it finds /usr/share/zoneinfo/localtime // that is just a symbolic link to /etc/localtime. // -// It started in 4.0 and was fixed in version 4.6, also the fix was backported to the 4.4 branch so it should be +// It was fixed in version 4.6, also the fix was backported to the 4.4 branch so it should be // available from 4.4.3... So we test if the workaround is required // // It is also relevant only for Linux, BSD and Apple (as I see in ICU code) // -#if BOOST_LOCALE_ICU_VERSION >= 400 && BOOST_LOCALE_ICU_VERSION <= 406 \ - && (BOOST_LOCALE_ICU_VERSION != 404 || U_ICU_VERSION_PATCHLEVEL_NUM >= 3) +#if BOOST_LOCALE_ICU_VERSION <= 406 && (BOOST_LOCALE_ICU_VERSION != 404 || U_ICU_VERSION_PATCHLEVEL_NUM >= 3) # if BOOST_OS_LINUX || BOOST_OS_BSD_FREE || defined(__APPLE__) # define BOOST_LOCALE_WORKAROUND_ICU_BUG # endif diff --git a/test/formatting_common.hpp b/test/formatting_common.hpp index 97ef7919..3fb9a6e0 100644 --- a/test/formatting_common.hpp +++ b/test/formatting_common.hpp @@ -10,7 +10,6 @@ #include #include -#include "../src/boost/locale/util/foreach_char.hpp" #include "boostLocale/test/tools.hpp" #include "boostLocale/test/unit_test.hpp" @@ -87,4 +86,40 @@ void test_parse_multi_number() BOOST_LOCALE_CALL(wchar_t); #undef BOOST_LOCALE_CALL #undef BOOST_LOCALE_CALL_I -} \ No newline at end of file +} + +template +void test_format_large_number_by_char(const std::locale& locale) +{ + std::basic_ostringstream output; + output.imbue(locale); + output << boost::locale::as::number; + + constexpr int64_t high_signed64 = 9223372036854775807; + static_assert(high_signed64 == std::numeric_limits::max()); + + empty_stream(output) << high_signed64; + TEST_EQ(output.str(), ascii_to("9,223,372,036,854,775,807")); + empty_stream(output) << static_cast(high_signed64); + TEST_EQ(output.str(), ascii_to("9,223,372,036,854,775,807")); + empty_stream(output) << (static_cast(high_signed64) + 1u); + TEST_EQ(output.str(), ascii_to("9,223,372,036,854,775,808")); + empty_stream(output) << (static_cast(high_signed64) + 579u); + TEST_EQ(output.str(), ascii_to("9,223,372,036,854,776,386")); +} + +void test_format_large_number() +{ + const auto locale = boost::locale::generator{}("en_US.UTF-8"); + + std::cout << "Testing char" << std::endl; + test_format_large_number_by_char(locale); + + std::cout << "Testing wchar_t" << std::endl; + test_format_large_number_by_char(locale); + +#ifdef BOOST_LOCALE_ENABLE_CHAR16_T + std::cout << "Testing char16_t" << std::endl; + test_format_large_number_by_char(locale); +#endif +} diff --git a/test/test_formatting.cpp b/test/test_formatting.cpp index 41c30a0f..b1359c8b 100644 --- a/test/test_formatting.cpp +++ b/test/test_formatting.cpp @@ -1,6 +1,6 @@ // // Copyright (c) 2009-2011 Artyom Beilis (Tonkikh) -// Copyright (c) 2021-2022 Alexander Grund +// Copyright (c) 2021-2024 Alexander Grund // // Distributed under the Boost Software License, Version 1.0. // https://www.boost.org/LICENSE_1_0.txt @@ -49,16 +49,13 @@ std::string from_icu_string(const icu::UnicodeString& str) # define BOOST_LOCALE_ICU_VERSION_EXACT 0 #endif -// Currency style changes between ICU versions, so get "real" value from ICU -#if BOOST_LOCALE_ICU_VERSION >= 402 - std::string get_icu_currency_iso(const double value) { -# if BOOST_LOCALE_ICU_VERSION >= 408 +#if BOOST_LOCALE_ICU_VERSION >= 408 auto styleIso = UNUM_CURRENCY_ISO; -# else +#else auto styleIso = icu::NumberFormat::kIsoCurrencyStyle; -# endif +#endif UErrorCode err = U_ZERO_ERROR; std::unique_ptr fmt(icu::NumberFormat::createInstance(get_icu_test_locale(), styleIso, err)); TEST_REQUIRE(U_SUCCESS(err) && fmt.get()); @@ -67,8 +64,6 @@ std::string get_icu_currency_iso(const double value) return from_icu_string(fmt->format(value, tmp)); } -#endif - using format_style_t = std::ios_base&(std::ios_base&); #ifdef BOOST_LOCALE_WITH_ICU @@ -401,9 +396,10 @@ void test_manip(std::string e_charset = "UTF-8") TEST_PARSE_FAILS(as::number, "4,294,967,295", int32_t); TEST_MIN_MAX(int64_t, "-9,223,372,036,854,775,808", "9,223,372,036,854,775,807"); - // ICU does not support uint64, but we have a fallback to format it at least - TEST_MIN_MAX_FMT(as::number, uint64_t, "0", "18446744073709551615"); + TEST_MIN_MAX(uint64_t, "0", "18,446,744,073,709,551,615"); TEST_PARSE_FAILS(as::number, "-1", uint64_t); + TEST_PARSE_FAILS(as::number, "18,446,744,073,709,551,615", int64_t); + TEST_PARSE_FAILS(as::number, "18,446,744,073,709,551,616", uint64_t); TEST_FMT_PARSE_3(as::number, std::left, std::setw(3), 15, "15 "); TEST_FMT_PARSE_3(as::number, std::right, std::setw(3), 15, " 15"); @@ -424,12 +420,11 @@ void test_manip(std::string e_charset = "UTF-8") TEST_PARSE_FAILS(as::currency, "$", double); -#if BOOST_LOCALE_ICU_VERSION >= 402 TEST_FMT_PARSE_2(as::currency, as::currency_national, 1345, "$1,345.00"); TEST_FMT_PARSE_2(as::currency, as::currency_national, 1345.34, "$1,345.34"); TEST_FMT_PARSE_2(as::currency, as::currency_iso, 1345, get_icu_currency_iso(1345)); TEST_FMT_PARSE_2(as::currency, as::currency_iso, 1345.34, get_icu_currency_iso(1345.34)); -#endif + TEST_FMT_PARSE_1(as::spellout, 10, "ten"); #if 402 <= BOOST_LOCALE_ICU_VERSION && BOOST_LOCALE_ICU_VERSION < 408 if(e_charset == "UTF-8") @@ -502,7 +497,6 @@ void test_manip(std::string e_charset = "UTF-8") a_datetime, icu_time_long, a_time + a_timesec); -#if !(BOOST_LOCALE_ICU_VERSION == 308 && defined(__CYGWIN__)) // Known failure due to ICU issue TEST_PARSE(as::time >> as::time_full >> as::time_zone("GMT+01:00"), "4:33:13 PM GMT+01:00", a_time + a_timesec); TEST_FMT_PARSE_3_2(as::time, as::time_full, @@ -510,7 +504,6 @@ void test_manip(std::string e_charset = "UTF-8") a_datetime, icu_time_full, a_time + a_timesec); -#endif const std::string icu_def = get_ICU_datetime(as::time, a_datetime); const std::string icu_short = get_ICU_datetime(as::time_short, a_datetime); @@ -759,11 +752,7 @@ void test_format_class(std::string charset = "UTF-8") // format with locale & encoding { -#if BOOST_LOCALE_ICU_VERSION >= 400 const auto expected = boost::locale::conv::utf_to_utf("10,00\xC2\xA0€"); -#else - const auto expected = boost::locale::conv::utf_to_utf("10,00 €"); // LCOV_EXCL_LINE -#endif TEST_EQ(do_format(loc, "{1,cur,locale=de_DE.UTF-8}", 10), expected); } @@ -791,20 +780,14 @@ void test_format_class(std::string charset = "UTF-8") TEST_FORMAT_CLS("{1,cur}", 1234, "$1,234.00"); TEST_FORMAT_CLS("{1,currency}", 1234, "$1,234.00"); if(charset == "UTF-8") { -#if BOOST_LOCALE_ICU_VERSION >= 400 TEST_FORMAT_CLS("{1,cur,locale=de_DE}", 10, "10,00\xC2\xA0€"); -#else - TEST_FORMAT_CLS("{1,cur,locale=de_DE}", 10, "10,00 €"); // LCOV_EXCL_LINE -#endif } -#if BOOST_LOCALE_ICU_VERSION >= 402 TEST_FORMAT_CLS("{1,cur=nat}", 1234, "$1,234.00"); TEST_FORMAT_CLS("{1,cur=national}", 1234, "$1,234.00"); TEST_FORMAT_CLS("{1,cur=iso}", 1234, get_icu_currency_iso(1234)); -#endif TEST_FORMAT_CLS("{1,spell}", 10, "ten"); TEST_FORMAT_CLS("{1,spellout}", 10, "ten"); -#if 402 <= BOOST_LOCALE_ICU_VERSION && BOOST_LOCALE_ICU_VERSION < 408 +#if BOOST_LOCALE_ICU_VERSION < 408 if(charset == "UTF-8") { TEST_FORMAT_CLS("{1,ord}", 1, "1\xcb\xa2\xe1\xb5\x97"); TEST_FORMAT_CLS("{1,ordinal}", 1, "1\xcb\xa2\xe1\xb5\x97"); @@ -899,6 +882,56 @@ void test_format_class(std::string charset = "UTF-8") TEST_FORMAT_CLS("{1,gmt,ftime='%D'}", a_datetime, "12/31/13"); } +/// Test formatting and parsing of uint64_t values that are not natively supported by ICU. +/// They use a custom code path which gets exercised by this +void test_uint64_format() +{ +#ifdef BOOST_LOCALE_WITH_ICU + std::set tested_langs; + int32_t count; + auto* cur_locale = icu::Locale::getAvailableLocales(count); + constexpr uint64_t value = std::numeric_limits::max() + uint64_t(3); + const std::string posix_value = as_posix_string(value); + constexpr int32_t short_value = std::numeric_limits::max(); + const std::string posix_short_value = as_posix_string(short_value); + boost::locale::generator g; + const std::string utf8 = ".UTF-8"; + for(int i = 0; i < count; i++, cur_locale++) { + // Test each language exactly once + // if(!tested_langs.insert(cur_locale->getLanguage()).second) + // continue; + TEST_CONTEXT(cur_locale->getName()); + UErrorCode err{}; + auto* fmt = icu::NumberFormat::createInstance(*cur_locale, err); + icu::UnicodeString s; + fmt->format(short_value, s, nullptr, err); + if(U_FAILURE(err)) + continue; + const std::string icu_value = boost::locale::conv::utf_to_utf(s.getBuffer(), s.getBuffer() + s.length()); + std::stringstream ss; + if(cur_locale->getLanguage() == std::string("pa")) + ss.str(""); + ss.imbue(g(cur_locale->getName() + utf8)); + ss << boost::locale::as::number; + // Sanity check + ss << short_value; + TEST_EQ(ss.str(), icu_value); + + // Assumption: Either both the int32 and uint64 values are in POSIX format, or neither are + // This is the case if separators are used and/or numbers are not ASCII + empty_stream(ss) << value; + if(icu_value == posix_short_value) + TEST_EQ(ss.str(), posix_value); + else + TEST_NE(ss.str(), posix_value); + + uint64_t parsed_value{}; + TEST(ss >> parsed_value); + TEST_EQ(parsed_value, value); + } +#endif +} + BOOST_LOCALE_DISABLE_UNREACHABLE_CODE_WARNING void test_main(int argc, char** argv) { @@ -909,6 +942,8 @@ void test_main(int argc, char** argv) std::cout << "ICU is not build... Skipping\n"; return; #endif + test_uint64_format(); + boost::locale::time_zone::global("GMT+4:00"); std::cout << "Testing char, UTF-8" << std::endl; test_manip(); @@ -933,6 +968,7 @@ void test_main(int argc, char** argv) test_format_class(); #endif + test_format_large_number(); test_parse_multi_number(); } diff --git a/test/test_posix_formatting.cpp b/test/test_posix_formatting.cpp index 15c4d6d2..ce1267c4 100644 --- a/test/test_posix_formatting.cpp +++ b/test/test_posix_formatting.cpp @@ -186,6 +186,7 @@ void test_main(int /*argc*/, char** /*argv*/) TEST(v == "12345,45" || v == "12 345,45" || v == "12.345,45"); } } + test_format_large_number(); test_parse_multi_number(); } diff --git a/test/test_std_formatting.cpp b/test/test_std_formatting.cpp index 79156ad0..13ddd300 100644 --- a/test/test_std_formatting.cpp +++ b/test/test_std_formatting.cpp @@ -233,8 +233,10 @@ void test_main(int /*argc*/, char** /*argv*/) } // Std backend silently falls back to the C locale when the locale is not supported // which breaks the test assumptions - if(has_std_locale("en_US.UTF-8")) + if(has_std_locale("en_US.UTF-8")) { + test_format_large_number(); test_parse_multi_number(); + } } // boostinspect:noascii diff --git a/test/test_winapi_formatting.cpp b/test/test_winapi_formatting.cpp index 499e92a8..55d598fd 100644 --- a/test/test_winapi_formatting.cpp +++ b/test/test_winapi_formatting.cpp @@ -177,6 +177,7 @@ void test_main(int /*argc*/, char** /*argv*/) test_by_char(l, name, name_lcid.second); } } + test_format_large_number(); test_parse_multi_number(); std::cout << "- Testing strftime" << std::endl; test_date_time(gen("en_US.UTF-8"));