Skip to content

Commit

Permalink
Merge branch 'develop'
Browse files Browse the repository at this point in the history
  • Loading branch information
Flamefire committed Jul 6, 2024
2 parents 7fc249e + ea81c22 commit 52cf8f8
Show file tree
Hide file tree
Showing 6 changed files with 270 additions and 29 deletions.
30 changes: 15 additions & 15 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Copyright 2020-2021 Peter Dimov
# Copyright 2021 Andrey Semashev
# Copyright 2021 Alexander Grund
# Copyright 2021-2024 Alexander Grund
# Copyright 2022 James E. King III
#
# Distributed under the Boost Software License, Version 1.0.
Expand Down Expand Up @@ -126,14 +126,16 @@ jobs:
compiler: clang-12, cxxstd: '11,14,17,20', os: ubuntu-20.04, stdlib: libc++, install: 'clang-12 libc++-12-dev libc++abi-12-dev' }

# OSX, clang
- { compiler: clang, cxxstd: '11,14,17,2a', os: macos-11, ccache: no }
- { compiler: clang, cxxstd: '11,14,17,20', os: macos-12 }
- { name: MacOS w/ clang and sanitizers,
compiler: clang, cxxstd: '11,14,17,20,2b', os: macos-13, ubsan: yes }
# TODO: Iconv issue
#- { compiler: clang, cxxstd: '11,14,17,20,2b', os: macos-14 }

timeout-minutes: 120
runs-on: ${{matrix.os}}
container: ${{matrix.container}}
env: {B2_USE_CCACHE: 1}
env: {B2_USE_CCACHE: 1, ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true}

steps:
- name: Setup environment
Expand Down Expand Up @@ -188,21 +190,19 @@ jobs:
run: |
SOURCE_KEYS=(${{join(matrix.source_keys, ' ')}})
SOURCES=(${{join(matrix.sources, ' ')}})
# Add this by default
# Add these by default
SOURCE_KEYS+=(
'http://keyserver.ubuntu.com/pks/lookup?op=get&search=0x1E9377A2BA9EF27F'
'http://keyserver.ubuntu.com/pks/lookup?op=get&search=0x40976EAF437D05B5'
'http://keyserver.ubuntu.com/pks/lookup?op=get&search=0x3B4FE6ACC0B21F32'
)
SOURCES+=(ppa:ubuntu-toolchain-r/test)
for key in "${SOURCE_KEYS[@]}"; do
for i in {1..$NET_RETRY_COUNT}; do
keyfilename=$(basename -s .key $key)
curl -sSL --retry ${NET_RETRY_COUNT:-5} "$key" | sudo gpg --dearmor > /etc/apt/trusted.gpg.d/${keyfilename} && break || sleep 10
done
done
ci/add-apt-keys.sh "${SOURCE_KEYS[@]}"
# Initial update before adding sources required to get e.g. keys
sudo apt-get -o Acquire::Retries=$NET_RETRY_COUNT update
for source in "${SOURCES[@]}"; do
for i in {1..$NET_RETRY_COUNT}; do
sudo add-apt-repository $source && break || sleep 10
done
done
ci/add-apt-repositories.sh "${SOURCES[@]}"
sudo apt-get -o Acquire::Retries=$NET_RETRY_COUNT update
if [[ -z "${{matrix.install}}" ]]; then
pkgs="${{matrix.compiler}}"
Expand Down
3 changes: 3 additions & 0 deletions doc/changelog.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
/*!
\page changelog Changelog

- 1.86.0
- Make ICU implementation of `to_title` threadsafe
- Add allocator support to `utf_to_utf`
- 1.85.0
- Breaking changes
- `collator` does no longer derive from `std::collator` avoiding possible type confusion
Expand Down
30 changes: 30 additions & 0 deletions include/boost/locale/detail/allocator_traits.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
//
// Copyright (c) 2024 Alexander Grund
//
// Distributed under the Boost Software License, Version 1.0.
// https://www.boost.org/LICENSE_1_0.txt

#ifndef BOOST_LOCALE_DETAIL_ALLOCATOR_TRAITS_HPP_INCLUDED
#define BOOST_LOCALE_DETAIL_ALLOCATOR_TRAITS_HPP_INCLUDED

#include <boost/locale/config.hpp>
#include <memory>
#include <type_traits>

/// \cond INTERNAL
namespace boost { namespace locale { namespace conv { namespace detail {
template<class Alloc, typename T>
using rebind_alloc = typename std::allocator_traits<Alloc>::template rebind_alloc<T>;

template<class Alloc, typename T, typename Result = void>
using enable_if_allocator_for =
typename std::enable_if<std::is_same<typename Alloc::value_type, T>::value, Result>::type;
template<class Alloc, typename T, class Alloc2, typename T2, typename Result = void>
using enable_if_allocator_for2 = typename std::enable_if<std::is_same<typename Alloc::value_type, T>::value
&& std::is_same<typename Alloc2::value_type, T2>::value,
Result>::type;
}}}} // namespace boost::locale::conv::detail

/// \endcond

#endif
104 changes: 92 additions & 12 deletions include/boost/locale/encoding_utf.hpp
Original file line number Diff line number Diff line change
@@ -1,16 +1,20 @@
//
// Copyright (c) 2009-2011 Artyom Beilis (Tonkikh)
// Copyright (c) 2022-2024 Alexander Grund
//
// Distributed under the Boost Software License, Version 1.0.
// https://www.boost.org/LICENSE_1_0.txt

#ifndef BOOST_LOCALE_ENCODING_UTF_HPP_INCLUDED
#define BOOST_LOCALE_ENCODING_UTF_HPP_INCLUDED

#include <boost/locale/detail/allocator_traits.hpp>
#include <boost/locale/encoding_errors.hpp>
#include <boost/locale/utf.hpp>
#include <boost/locale/util/string.hpp>
#include <iterator>
#include <memory>
#include <type_traits>

#ifdef BOOST_MSVC
# pragma warning(push)
Expand All @@ -25,12 +29,13 @@ namespace boost { namespace locale { namespace conv {
/// Convert a Unicode text in range [begin,end) to other Unicode encoding
///
/// \throws conversion_error: Conversion failed (e.g. \a how is \c stop and any character cannot be decoded)
template<typename CharOut, typename CharIn>
std::basic_string<CharOut> utf_to_utf(const CharIn* begin, const CharIn* end, method_type how = default_method)
template<typename CharOut, typename CharIn, class Alloc = std::allocator<CharOut>>
std::basic_string<CharOut, std::char_traits<CharOut>, Alloc>
utf_to_utf(const CharIn* begin, const CharIn* end, method_type how = default_method, const Alloc& alloc = Alloc())
{
std::basic_string<CharOut> result;
std::basic_string<CharOut, std::char_traits<CharOut>, Alloc> result(alloc);
result.reserve(end - begin);
std::back_insert_iterator<std::basic_string<CharOut>> inserter(result);
auto inserter = std::back_inserter(result);
while(begin != end) {
const utf::code_point c = utf::utf_traits<CharIn>::decode(begin, end);
if(c == utf::illegal || c == utf::incomplete) {
Expand All @@ -42,22 +47,97 @@ namespace boost { namespace locale { namespace conv {
return result;
}

/// Convert a Unicode NULL terminated string \a str other Unicode encoding
/// Convert a Unicode string \a str to other Unicode encoding.
/// Invalid characters are skipped.
template<typename CharOut, typename CharIn, class Alloc>
std::basic_string<CharOut, std::char_traits<CharOut>, Alloc>
utf_to_utf(const CharIn* begin, const CharIn* end, const Alloc& alloc)
{
return utf_to_utf<CharOut>(begin, end, skip, alloc);
}

/// Convert a Unicode NULL terminated string \a str to other Unicode encoding
///
/// \throws conversion_error: Conversion failed (e.g. \a how is \c stop and any character cannot be decoded)
template<typename CharOut, typename CharIn, class Alloc = std::allocator<CharOut>>
std::basic_string<CharOut, std::char_traits<CharOut>, Alloc>
utf_to_utf(const CharIn* str, method_type how = default_method, const Alloc& alloc = Alloc())
{
return utf_to_utf<CharOut>(str, util::str_end(str), how, alloc);
}

/// Convert a Unicode string \a str to other Unicode encoding.
/// Invalid characters are skipped.
template<typename CharOut, typename CharIn, class Alloc>
#ifndef BOOST_LOCALE_DOXYGEN
detail::enable_if_allocator_for<Alloc,
CharOut,
#endif
std::basic_string<CharOut, std::char_traits<CharOut>, Alloc>
#ifndef BOOST_LOCALE_DOXYGEN
>
#endif
utf_to_utf(const CharIn* str, const Alloc& alloc)
{
return utf_to_utf<CharOut>(str, skip, alloc);
}

/// Convert a Unicode string \a str to other Unicode encoding
///
/// \throws conversion_error: Conversion failed (e.g. \a how is \c stop and any character cannot be decoded)
template<typename CharOut, typename CharIn>
std::basic_string<CharOut> utf_to_utf(const CharIn* str, method_type how = default_method)
template<typename CharOut, typename CharIn, class Alloc>
#ifndef BOOST_LOCALE_DOXYGEN
detail::enable_if_allocator_for<
Alloc,
CharIn,
#endif
std::basic_string<CharOut, std::char_traits<CharOut>, detail::rebind_alloc<Alloc, CharOut>>
#ifndef BOOST_LOCALE_DOXYGEN
>
#endif
utf_to_utf(const std::basic_string<CharIn, std::char_traits<CharIn>, Alloc>& str, method_type how = default_method)
{
return utf_to_utf<CharOut, CharIn>(str, util::str_end(str), how);
return utf_to_utf<CharOut>(str.c_str(),
str.c_str() + str.size(),
how,
detail::rebind_alloc<Alloc, CharOut>(str.get_allocator()));
}

/// Convert a Unicode string \a str other Unicode encoding
/// Convert a Unicode string \a str to other Unicode encoding
///
/// \throws conversion_error: Conversion failed (e.g. \a how is \c stop and any character cannot be decoded)
template<typename CharOut, typename CharIn>
std::basic_string<CharOut> utf_to_utf(const std::basic_string<CharIn>& str, method_type how = default_method)
template<typename CharOut, typename CharIn, class AllocOut, class AllocIn>
#ifndef BOOST_LOCALE_DOXYGEN
detail::enable_if_allocator_for<AllocIn,
CharIn,
#endif
std::basic_string<CharOut, std::char_traits<CharOut>, AllocOut>
#ifndef BOOST_LOCALE_DOXYGEN
>
#endif
utf_to_utf(const std::basic_string<CharIn, std::char_traits<CharIn>, AllocIn>& str,
method_type how = default_method,
const AllocOut& alloc = AllocOut())
{
return utf_to_utf<CharOut>(str.c_str(), str.c_str() + str.size(), how, alloc);
}

/// Convert a Unicode string \a str to other Unicode encoding.
/// Invalid characters are skipped.
template<typename CharOut, typename CharIn, class AllocOut, class AllocIn>
#ifndef BOOST_LOCALE_DOXYGEN
detail::enable_if_allocator_for2<AllocIn,
CharIn,
AllocOut,
CharOut,
#endif
std::basic_string<CharOut, std::char_traits<CharOut>, AllocOut>
#ifndef BOOST_LOCALE_DOXYGEN
>
#endif
utf_to_utf(const std::basic_string<CharIn, std::char_traits<CharIn>, AllocIn>& str, const AllocOut& alloc)
{
return utf_to_utf<CharOut, CharIn>(str.c_str(), str.c_str() + str.size(), how);
return utf_to_utf<CharOut>(str, skip, alloc);
}

/// @}
Expand Down
4 changes: 2 additions & 2 deletions test/boostLocale/test/unit_test.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -164,8 +164,8 @@ void stream_char(std::ostream& s, const Char c)
<< static_cast<unsigned>(c);
}

template<typename Char>
std::string to_string(const std::basic_string<Char>& s)
template<typename Char, class Alloc>
std::string to_string(const std::basic_string<Char, std::char_traits<Char>, Alloc>& s)
{
std::stringstream ss;
for(const Char c : s)
Expand Down
128 changes: 128 additions & 0 deletions test/test_encoding.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -460,6 +460,133 @@ void test_utf_to_utf()
#endif
}

/// Allocator that reports when it has been used in a static variable
int globalUsedId = 0;
template<typename T>
struct CustomAllocator {
using value_type = T;
using pointer = T*;
using const_pointer = const T*;
using reference = T&;
using const_reference = const T&;
using size_type = std::size_t;
using difference_type = std::ptrdiff_t;
using propagate_on_container_move_assignment = std::true_type;
using is_always_equal = std::false_type;

template<typename U>
struct rebind {
typedef CustomAllocator<U> other;
};

CustomAllocator(const int id = 1) : id(id) {}
template<typename U>
CustomAllocator(const CustomAllocator<U>& other) : id(other.id)
{}

T* allocate(size_t n)
{
// Only count allocations of (w)chars, not e.g. internal proxy instances
BOOST_LOCALE_START_CONST_CONDITION
if(std::is_same<T, char>::value || std::is_same<T, wchar_t>::value)
usedId += id;
BOOST_LOCALE_END_CONST_CONDITION
return base.allocate(n);
}

void deallocate(T* p, size_t n) { return base.deallocate(p, n); }

static int& usedId;
int id;

private:
std::allocator<T> base;
};
template<class T, class U>
bool operator==(const CustomAllocator<T>&, const CustomAllocator<U>&)
{
return true;
}

template<class T, class U>
bool operator!=(const CustomAllocator<T>&, const CustomAllocator<U>&)
{
return false;
}

namespace detail {
// Note that using a static class variable does not work due to possible rebinds
int allocUsedId = 0;
} // namespace detail
template<typename T>
int& CustomAllocator<T>::usedId = detail::allocUsedId;

void test_utf_to_utf_allocator_support()
{
using Alloc = CustomAllocator<wchar_t>;
using AllocIn = CustomAllocator<char>;
using boost::locale::conv::utf_to_utf;
const auto method = boost::locale::conv::default_method;
const std::string input(65, '0'); // Long enough to avoid SBO
const AllocIn inputAllocator(17);
const std::basic_string<char, std::char_traits<char>, AllocIn> inputWithAlloc(input.begin(),
input.end(),
inputAllocator);
const std::basic_string<wchar_t, std::char_traits<wchar_t>, Alloc> output(input.begin(), input.end());
const char* sBegin = input.data();
const char* sEnd = sBegin + input.size();

// Allocator via template param
Alloc::usedId = 0;
TEST_EQ((utf_to_utf<wchar_t, char, Alloc>(sBegin, sEnd)), output);
TEST_EQ(Alloc::usedId, 1);
Alloc::usedId = 0;
TEST_EQ((utf_to_utf<wchar_t, char, Alloc>(sBegin, method)), output);
TEST_EQ(Alloc::usedId, 1);
Alloc::usedId = 0;
TEST_EQ((utf_to_utf<wchar_t, char, Alloc>(inputWithAlloc)), output);
TEST_EQ(Alloc::usedId, 1);
Alloc::usedId = 0;
TEST_EQ((utf_to_utf<wchar_t, char, Alloc>(inputWithAlloc, method)), output);
TEST_EQ(Alloc::usedId, 1);

// Pass allocator explicitly
Alloc::usedId = 0;
TEST_EQ(utf_to_utf<wchar_t>(sBegin, sEnd, method, Alloc(2)), output);
TEST_EQ(Alloc::usedId, 2);
Alloc::usedId = 0;
TEST_EQ(utf_to_utf<wchar_t>(sBegin, method, Alloc(3)), output);
TEST_EQ(Alloc::usedId, 3);
Alloc::usedId = 0;
TEST_EQ(utf_to_utf<wchar_t>(inputWithAlloc, method, Alloc(4)), output);
TEST_EQ(Alloc::usedId, 4);
// Same with using the default method
Alloc::usedId = 0;
TEST_EQ(utf_to_utf<wchar_t>(sBegin, sEnd, Alloc(2)), output);
TEST_EQ(Alloc::usedId, 2);
Alloc::usedId = 0;
TEST_EQ(utf_to_utf<wchar_t>(sBegin, Alloc(3)), output);
TEST_EQ(Alloc::usedId, 3);
Alloc::usedId = 0;
TEST_EQ(utf_to_utf<wchar_t>(inputWithAlloc, Alloc(4)), output);
TEST_EQ(Alloc::usedId, 4);

// Use allocator from input
Alloc::usedId = 0;
TEST_EQ(utf_to_utf<wchar_t>(inputWithAlloc), output);
TEST_EQ(Alloc::usedId, inputAllocator.id);
Alloc::usedId = 0;
TEST_EQ(utf_to_utf<wchar_t>(inputWithAlloc, method), output);
TEST_EQ(Alloc::usedId, inputAllocator.id);

// Unchanged allocator for string overloads to check for ambiguous overloads
AllocIn::usedId = 0;
TEST_EQ(utf_to_utf<char>(inputWithAlloc, method, AllocIn(4)), inputWithAlloc);
TEST_EQ(AllocIn::usedId, 4);
TEST_EQ(utf_to_utf<char>(inputWithAlloc), inputWithAlloc);
TEST_EQ(AllocIn::usedId, 4 + inputAllocator.id);
}

/// Test all overloads of to_utf/from_utf templated by Char
template<typename Char>
void test_latin1_conversions_for()
Expand Down Expand Up @@ -647,6 +774,7 @@ void test_main(int /*argc*/, char** /*argv*/)

test_latin1_conversions();
test_utf_to_utf();
test_utf_to_utf_allocator_support();

std::cout << "Testing charset to/from UTF conversion functions\n";
std::cout << " char" << std::endl;
Expand Down

0 comments on commit 52cf8f8

Please sign in to comment.