Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement support for uint64_t values in ICU backend #246

Open
wants to merge 3 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# Copyright 2020, 2021 Peter Dimov
# Copyright 2023-2024 Alexander Grund
# Distributed under the Boost Software License, Version 1.0.
# https://www.boost.org/LICENSE_1_0.txt

Expand Down Expand Up @@ -51,6 +52,7 @@ target_link_libraries(boost_locale
Boost::iterator
Boost::utility
PRIVATE
Boost::charconv
Boost::predef
Boost::thread
)
Expand Down
3 changes: 2 additions & 1 deletion build/Jamfile.v2
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Copyright 2003 John Maddock
# Copyright 2010 Artyom Beilis
# Copyright 2021 - 2022 Alexander Grund
# Copyright 2021 - 2024 Alexander Grund
#
# Distributed under the Boost Software License, Version 1.0.
# https://www.boost.org/LICENSE_1_0.txt.
Expand All @@ -16,6 +16,7 @@ import toolset ;
path-constant TOP : .. ;

constant boost_dependencies_private :
/boost/charconv//boost_charconv
/boost/predef//boost_predef
/boost/thread//boost_thread
;
Expand Down
1 change: 1 addition & 0 deletions doc/changelog.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

- 1.88.0
- Fix parsing of numbers in floating point format to integers
- Require ICU 4.2 or later
- 1.86.0
- Make ICU implementation of `to_title` threadsafe
- Add allocator support to `utf_to_utf`
Expand Down
12 changes: 2 additions & 10 deletions src/boost/locale/icu/collator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,8 @@
#include <limits>
#include <memory>
#include <unicode/coll.h>
#include <unicode/stringpiece.h>
#include <vector>
#if BOOST_LOCALE_ICU_VERSION >= 402
# define BOOST_LOCALE_WITH_STRINGPIECE 1
# include <unicode/stringpiece.h>
#else
# define BOOST_LOCALE_WITH_STRINGPIECE 0
#endif

#ifdef BOOST_MSVC
# pragma warning(disable : 4244) // 'argument' : conversion from 'int'
Expand All @@ -43,7 +38,6 @@ namespace boost { namespace locale { namespace impl_icu {
return res;
}

#if BOOST_LOCALE_WITH_STRINGPIECE
int do_utf8_compare(collate_level level,
const char* b1,
const char* e1,
Expand All @@ -55,7 +49,6 @@ namespace boost { namespace locale { namespace impl_icu {
icu::StringPiece right(b2, e2 - b2);
return get_collator(level).compareUTF8(left, right, status);
}
#endif

int do_ustring_compare(collate_level level,
const CharType* b1,
Expand Down Expand Up @@ -159,7 +152,6 @@ namespace boost { namespace locale { namespace impl_icu {
bool is_utf8_;
};

#if BOOST_LOCALE_WITH_STRINGPIECE
template<>
int collate_impl<char>::do_real_compare(collate_level level,
const char* b1,
Expand All @@ -173,7 +165,7 @@ namespace boost { namespace locale { namespace impl_icu {
else
return do_ustring_compare(level, b1, e1, b2, e2, status);
}
#endif

std::locale create_collate(const std::locale& in, const cdata& cd, char_facet_t type)
{
switch(type) {
Expand Down
17 changes: 2 additions & 15 deletions src/boost/locale/icu/conversion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,8 @@
#include <limits>
#include <unicode/locid.h>
#include <unicode/normlzr.h>
#include <unicode/ucasemap.h>
#include <unicode/ustring.h>
#if BOOST_LOCALE_ICU_VERSION >= 308
# include <unicode/ucasemap.h>
# define BOOST_LOCALE_WITH_CASEMAP
#endif
#include <vector>

namespace boost { namespace locale { namespace impl_icu {
Expand Down Expand Up @@ -72,7 +69,6 @@ namespace boost { namespace locale { namespace impl_icu {
std::string encoding_;
}; // converter_impl

#ifdef BOOST_LOCALE_WITH_CASEMAP
template<typename T>
struct get_casemap_size_type;

Expand Down Expand Up @@ -193,26 +189,17 @@ namespace boost { namespace locale { namespace impl_icu {
raii_casemap<U8Char> map_;
}; // converter_impl

#endif // BOOST_LOCALE_WITH_CASEMAP

std::locale create_convert(const std::locale& in, const cdata& cd, char_facet_t type)
{
switch(type) {
case char_facet_t::nochar: break;
case char_facet_t::char_f:
#ifdef BOOST_LOCALE_WITH_CASEMAP
if(cd.is_utf8())
return std::locale(in, new utf8_converter_impl<char>(cd));
#endif
return std::locale(in, new converter_impl<char>(cd));
case char_facet_t::wchar_f: return std::locale(in, new converter_impl<wchar_t>(cd));
#ifndef BOOST_LOCALE_NO_CXX20_STRING8
case char_facet_t::char8_f:
# if defined(BOOST_LOCALE_WITH_CASEMAP)
return std::locale(in, new utf8_converter_impl<char8_t>(cd));
# else
return std::locale(in, new converter_impl<char8_t>(cd));
# endif
case char_facet_t::char8_f: return std::locale(in, new utf8_converter_impl<char8_t>(cd));
#elif defined(__cpp_char8_t)
case char_facet_t::char8_f: break;
#endif
Expand Down
19 changes: 3 additions & 16 deletions src/boost/locale/icu/date_time.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,10 +67,6 @@ namespace boost { namespace locale { namespace impl_icu {
const double rounded_time = std::floor(calendar_->getTime(err) / U_MILLIS_PER_SECOND) * U_MILLIS_PER_SECOND;
calendar_->setTime(rounded_time, err);
check_and_throw_dt(err);
#if BOOST_LOCALE_ICU_VERSION < 402
// workaround old/invalid data, it should be 4 in general
calendar_->setMinimalDaysInFirstWeek(4);
#endif
encoding_ = dat.encoding();
}
calendar_impl(const calendar_impl& other)
Expand All @@ -79,15 +75,9 @@ namespace boost { namespace locale { namespace impl_icu {
encoding_ = other.encoding_;
}

calendar_impl* clone() const override
{
return new calendar_impl(*this);
}
calendar_impl* clone() const override { return new calendar_impl(*this); }

void set_value(period::marks::period_mark p, int value) override
{
calendar_->set(to_icu(p), int32_t(value));
}
void set_value(period::marks::period_mark p, int value) override { calendar_->set(to_icu(p), int32_t(value)); }

int get_value(period::marks::period_mark p, value_type type) const override
{
Expand Down Expand Up @@ -202,10 +192,7 @@ namespace boost { namespace locale { namespace impl_icu {
check_and_throw_dt(err);
return diff;
}
void set_timezone(const std::string& tz) override
{
calendar_->adoptTimeZone(get_time_zone(tz));
}
void set_timezone(const std::string& tz) override { calendar_->adoptTimeZone(get_time_zone(tz)); }
std::string get_timezone() const override
{
icu::UnicodeString tz;
Expand Down
65 changes: 50 additions & 15 deletions src/boost/locale/icu/formatter.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
//
// Copyright (c) 2009-2011 Artyom Beilis (Tonkikh)
// Copyright (c) 2021-2023 Alexander Grund
// Copyright (c) 2021-2024 Alexander Grund
//
// Distributed under the Boost Software License, Version 1.0.
// https://www.boost.org/LICENSE_1_0.txt
Expand All @@ -13,8 +13,12 @@
#include "boost/locale/icu/time_zone.hpp"
#include "boost/locale/icu/uconv.hpp"
#include "boost/locale/util/foreach_char.hpp"
#include <boost/assert.hpp>
#include <boost/charconv/from_chars.hpp>
#include <boost/charconv/to_chars.hpp>
#include <limits>
#include <memory>
#include <sstream>
#ifdef BOOST_MSVC
# pragma warning(push)
# pragma warning(disable : 4251) // "identifier" : class "type" needs to have dll-interface...
Expand Down Expand Up @@ -62,35 +66,69 @@ namespace boost { namespace locale { namespace impl_icu {
string_type format(int64_t value, size_t& code_points) const override { return do_format(value, code_points); }
string_type format(int32_t value, size_t& code_points) const override { return do_format(value, code_points); }
size_t parse(const string_type& str, double& value) const override { return do_parse(str, value); }
size_t parse(const string_type& str, uint64_t& value) const override { return do_parse(str, value); }
size_t parse(const string_type& str, int64_t& value) const override { return do_parse(str, value); }
size_t parse(const string_type& str, int32_t& value) const override { return do_parse(str, value); }

string_type format(const uint64_t value, size_t& code_points) const override
{
// ICU only supports int64_t as the largest integer type
if(value <= static_cast<uint64_t>(std::numeric_limits<int64_t>::max()))
return format(static_cast<int64_t>(value), code_points);

// Fallback to using a StringPiece (decimal number) as input
char buffer[std::numeric_limits<uint64_t>::digits10 + 2];
auto res = boost::charconv::to_chars(buffer, std::end(buffer), value);
BOOST_ASSERT(res);
*res.ptr = '\0'; // ICU expects a NULL-terminated string even for the StringPiece
icu::UnicodeString tmp;
UErrorCode err = U_ZERO_ERROR;
icu_fmt_.format(icu::StringPiece(buffer, res.ptr - buffer), tmp, nullptr, err);
check_and_throw_icu_error(err);
code_points = tmp.countChar32();
return cvt_.std(tmp);
}

private:
bool get_value(double& v, icu::Formattable& fmt) const
{
UErrorCode err = U_ZERO_ERROR;
v = fmt.getDouble(err);
if(U_FAILURE(err))
return false;
return true;
return U_SUCCESS(err);
}

bool get_value(int64_t& v, icu::Formattable& fmt) const
{
UErrorCode err = U_ZERO_ERROR;
v = fmt.getInt64(err);
return U_SUCCESS(err);
}

bool get_value(uint64_t& v, icu::Formattable& fmt) const
{
UErrorCode err = U_ZERO_ERROR;
// ICU only supports int64_t as the largest integer type
const int64_t tmp = fmt.getInt64(err);
if(U_SUCCESS(err)) {
if(tmp < 0)
return false;
v = static_cast<uint64_t>(tmp);
return true;
}
// Get value as a decimal number and parse that
err = U_ZERO_ERROR;
const auto decimals = fmt.getDecimalNumber(err);
if(U_FAILURE(err))
return false;
return true;
return false; // Not a number
const auto res = boost::charconv::from_chars({decimals.data(), static_cast<size_t>(decimals.length())}, v);
return static_cast<bool>(res);
}

bool get_value(int32_t& v, icu::Formattable& fmt) const
{
UErrorCode err = U_ZERO_ERROR;
v = fmt.getLong(err);
if(U_FAILURE(err))
return false;
return true;
return U_SUCCESS(err);
}

template<typename ValueType>
Expand All @@ -114,14 +152,11 @@ namespace boost { namespace locale { namespace impl_icu {
icu_fmt_.setParseIntegerOnly(std::is_integral<ValueType>::value && isNumberOnly_);
icu_fmt_.parse(tmp, val, pp);

ValueType tmp_v;

if(pp.getIndex() == 0 || !get_value(tmp_v, val))
if(pp.getIndex() == 0 || !get_value(v, val))
return 0;
size_t cut = cvt_.cut(tmp, str.data(), str.data() + str.size(), pp.getIndex());
if(cut == 0)
return 0;
v = tmp_v;
return cut;
}

Expand All @@ -136,11 +171,11 @@ namespace boost { namespace locale { namespace impl_icu {
typedef std::basic_string<CharType> string_type;

string_type format(double value, size_t& code_points) const override { return do_format(value, code_points); }
string_type format(uint64_t value, size_t& code_points) const override { return do_format(value, code_points); }
string_type format(int64_t value, size_t& code_points) const override { return do_format(value, code_points); }

string_type format(int32_t value, size_t& code_points) const override { return do_format(value, code_points); }

size_t parse(const string_type& str, double& value) const override { return do_parse(str, value); }
size_t parse(const string_type& str, uint64_t& value) const override { return do_parse(str, value); }
size_t parse(const string_type& str, int64_t& value) const override { return do_parse(str, value); }
size_t parse(const string_type& str, int32_t& value) const override { return do_parse(str, value); }

Expand Down
6 changes: 6 additions & 0 deletions src/boost/locale/icu/formatter.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
//
// Copyright (c) 2009-2011 Artyom Beilis (Tonkikh)
// Copyright (c) 2024 Alexander Grund
//
// Distributed under the Boost Software License, Version 1.0.
// https://www.boost.org/LICENSE_1_0.txt
Expand Down Expand Up @@ -31,6 +32,8 @@ namespace boost { namespace locale { namespace impl_icu {
/// Format the value and return the number of Unicode code points
virtual string_type format(double value, size_t& code_points) const = 0;
/// Format the value and return the number of Unicode code points
virtual string_type format(uint64_t value, size_t& code_points) const = 0;
/// Format the value and return the number of Unicode code points
virtual string_type format(int64_t value, size_t& code_points) const = 0;
/// Format the value and return the number of Unicode code points
virtual string_type format(int32_t value, size_t& code_points) const = 0;
Expand All @@ -40,6 +43,9 @@ namespace boost { namespace locale { namespace impl_icu {
virtual size_t parse(const string_type& str, double& value) const = 0;
/// Parse the string and return the number of used characters. If it returns 0
/// then parsing failed.
virtual size_t parse(const string_type& str, uint64_t& value) const = 0;
/// Parse the string and return the number of used characters. If it returns 0
/// then parsing failed.
virtual size_t parse(const string_type& str, int64_t& value) const = 0;
/// Parse the string and return the number of used characters. If it returns 0
/// then parsing failed.
Expand Down
5 changes: 1 addition & 4 deletions src/boost/locale/icu/formatters_cache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -92,16 +92,13 @@ namespace boost { namespace locale { namespace impl_icu {
case num_fmt_type::curr_iso:
return icu::NumberFormat::createInstance(locale_, UNUM_CURRENCY_ISO, err);
break;
#elif BOOST_LOCALE_ICU_VERSION >= 402
#else
case num_fmt_type::curr_nat:
return icu::NumberFormat::createInstance(locale_, icu::NumberFormat::kCurrencyStyle, err);
break;
case num_fmt_type::curr_iso:
return icu::NumberFormat::createInstance(locale_, icu::NumberFormat::kIsoCurrencyStyle, err);
break;
#else
case num_fmt_type::curr_nat:
case num_fmt_type::curr_iso: return icu::NumberFormat::createCurrencyInstance(locale_, err); break;
#endif
case num_fmt_type::percent: return icu::NumberFormat::createPercentInstance(locale_, err); break;
case num_fmt_type::spell: return new icu::RuleBasedNumberFormat(icu::URBNF_SPELLOUT, locale_, err); break;
Expand Down
Loading
Loading