From 87af86625d32ec3f056bfc5b2f26cd5b4a1e5a6d Mon Sep 17 00:00:00 2001 From: Johan Mabille Date: Tue, 19 Mar 2024 11:16:30 +0100 Subject: [PATCH 01/14] Implemented variable_size_binary_layout --- CMakeLists.txt | 1 + .../sparrow/variable_size_binary_layout.hpp | 93 +++++++++++++++++++ test/CMakeLists.txt | 1 + test/test_variable_size_binary_layout.cpp | 83 +++++++++++++++++ 4 files changed, 178 insertions(+) create mode 100644 include/sparrow/variable_size_binary_layout.hpp create mode 100644 test/test_variable_size_binary_layout.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 5e66f443..e89d6110 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -57,6 +57,7 @@ set(SPARROW_HEADERS ${SPARROW_INCLUDE_DIR}/sparrow/iterator.hpp ${SPARROW_INCLUDE_DIR}/sparrow/mp_utils.hpp ${SPARROW_INCLUDE_DIR}/sparrow/sparrow_version.hpp + ${SPARROW_INCLUDE_DIR}/sparrow/variable_size_binary_layout.hpp ) add_library(sparrow INTERFACE) diff --git a/include/sparrow/variable_size_binary_layout.hpp b/include/sparrow/variable_size_binary_layout.hpp new file mode 100644 index 00000000..f171efe0 --- /dev/null +++ b/include/sparrow/variable_size_binary_layout.hpp @@ -0,0 +1,93 @@ +// Copyright 2024 Man Group Operations Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or mplied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "sparrow/array_data.hpp" +#include "sparrow/iterator.hpp" + +namespace sparrow +{ + template + class variable_size_binary_layout + { + public: + + using self_type = variable_size_binary_layout; + using inner_value_type = T; + using inner_const_reference = CR; + using value_type = std::optional; + using const_reference = const_reference_proxy; + using size_type = std::size_t; + + explicit variable_size_binary_layout(array_data data); + + size_type size() const; + const_reference operator[](size_type i) const; + + private: + + using sequence_value_type = typename T::value_type; + + bool has_value(size_type i) const; + inner_const_reference value(size_type i) const; + + // We use the bitmap and the first two buffers + // The first buffer contains the offsets of + // the elements in the second buffer + array_data m_data; + + friend class const_reference_proxy; + }; + + /********************************************** + * variable_size_binary_layout implementation * + **********************************************/ + + template + variable_size_binary_layout::variable_size_binary_layout(array_data data) + : m_data(std::move(data)) + { + assert(m_data.buffers.size() == 2u); + } + + template + auto variable_size_binary_layout::size() const -> size_type + { + return static_cast(m_data.length - m_data.offset); + } + + template + auto variable_size_binary_layout::operator[](size_type i) const -> const_reference + { + return const_reference(*this, i); + } + + template + auto variable_size_binary_layout::has_value(size_type i) const -> bool + { + return m_data.bitmap.test(i + m_data.offset); + } + + template + auto variable_size_binary_layout::value(size_type i) const -> inner_const_reference + { + const auto& offsets = m_data.buffers[0]; + size_type first_index = offsets[m_data.offset + i]; + size_type last_index = offsets[m_data.offset + i + 1]; + const auto* seq = m_data.buffers[1].data(); + return CR(seq + first_index, seq + last_index); + } +} + diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 643ceb15..82e9d051 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -40,6 +40,7 @@ set(SPARROW_TESTS_SOURCES test_dynamic_bitset.cpp test_iterator.cpp test_layout.cpp + test_variable_size_binary_layout.cpp ) set(test_target "test_sparrow_lib") add_executable(${test_target} ${SPARROW_TESTS_SOURCES}) diff --git a/test/test_variable_size_binary_layout.cpp b/test/test_variable_size_binary_layout.cpp new file mode 100644 index 00000000..3db03824 --- /dev/null +++ b/test/test_variable_size_binary_layout.cpp @@ -0,0 +1,83 @@ +// Copyright 2024 Man Group Operations Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "doctest/doctest.h" + +#include +#include + +#include "sparrow/array_data.hpp" +#include "sparrow/variable_size_binary_layout.hpp" + +#include + +namespace sparrow +{ + struct vs_binary_fixture + { + vs_binary_fixture() + { + m_data.buffers.resize(2); + m_data.buffers[0].resize(nb_words + 1); + m_data.buffers[1].resize(std::accumulate( + words, words + nb_words, 0u, [](std::size_t res, const auto& s) { return res + s.size(); } + )); + m_data.buffers[0][0] = 0u; + auto iter = m_data.buffers[1].begin(); + for (size_t i = 0; i < nb_words; ++i) + { + m_data.buffers[0][i+1] = m_data.buffers[0][i] + words[i].size(); + std::copy(words[i].cbegin(), words[i].cend(), iter); + iter += words[i].size(); + } + + m_data.length = 4; + m_data.offset = 1; + } + + static constexpr size_t nb_words = 4u; + static constexpr std::string words[nb_words] = + { + "you", + "are", + "not", + "prepared" + }; + + array_data m_data; + using layout_type = variable_size_binary_layout; + }; + + TEST_SUITE("variable_size_binary_layout") + { + TEST_CASE_FIXTURE(vs_binary_fixture, "size") + { + layout_type l(m_data); + CHECK_EQ(l.size(), m_data.length - m_data.offset); + } + + TEST_CASE_FIXTURE(vs_binary_fixture, "operator[]") + { + layout_type l(m_data); + auto cref0 = l[0]; + auto cref1 = l[1]; + auto cref2 = l[2]; + + std::cout << cref0.value() << std::endl; + CHECK_EQ(cref0.value(), words[1]); + CHECK_EQ(cref1.value(), words[2]); + CHECK_EQ(cref2.value(), words[3]); + } + } +} From 34a60fbc39dc2af2529be4c25f4bdede7d86bed4 Mon Sep 17 00:00:00 2001 From: Johan Mabille Date: Thu, 21 Mar 2024 06:45:51 +0100 Subject: [PATCH 02/14] Please allow me to introduce myself --- include/sparrow/mp_utils.hpp | 1 + .../sparrow/variable_size_binary_layout.hpp | 200 ++++++++++++++++-- test/test_variable_size_binary_layout.cpp | 43 +++- 3 files changed, 221 insertions(+), 23 deletions(-) diff --git a/include/sparrow/mp_utils.hpp b/include/sparrow/mp_utils.hpp index 70260c34..34361cc8 100644 --- a/include/sparrow/mp_utils.hpp +++ b/include/sparrow/mp_utils.hpp @@ -28,5 +28,6 @@ namespace sparrow template using constify_t = typename constify::type; + } } diff --git a/include/sparrow/variable_size_binary_layout.hpp b/include/sparrow/variable_size_binary_layout.hpp index f171efe0..861d4754 100644 --- a/include/sparrow/variable_size_binary_layout.hpp +++ b/include/sparrow/variable_size_binary_layout.hpp @@ -19,75 +19,239 @@ namespace sparrow { - template + namespace impl + { + template + struct get_inner_reference + : std::conditional< + is_const, + typename C::inner_const_reference, + typename C::inner_reference + > + { + }; + + template + using get_inner_reference_t = typename get_inner_reference::type; + } + + template + class vs_binary_value_iterator : public iterator_base + < + vs_binary_value_iterator, + mpl::constify_t, + std::contiguous_iterator_tag, + impl::get_inner_reference_t + > + { + public: + + using self_type = vs_binary_value_iterator; + using base_type = iterator_base + < + self_type, + mpl::constify_t, + std::contiguous_iterator_tag, + impl::get_inner_reference_t + >; + using reference = typename base_type::reference; + using difference_type = typename base_type::difference_type; + + using offset_iterator = std::conditional_t< + is_const, typename L::const_offset_iterator, typename L::offset_iterator + >; + using data_iterator = std::conditional_t< + is_const, typename L::const_data_iterator, typename L::data_iterator + >; + + vs_binary_value_iterator() noexcept = default; + vs_binary_value_iterator( + offset_iterator offset_it, + data_iterator data_begin + ); + + private: + + reference dereference() const; + void increment(); + void decrement(); + void advance(difference_type n); + difference_type distance_to(const self_type& rhs) const; + bool equal(const self_type& rhs) const; + bool less_than(const self_type& rhs) const; + + offset_iterator m_offset_it; + const data_iterator m_data_begin; + + friend class iterator_access; + }; + + template class variable_size_binary_layout { public: - using self_type = variable_size_binary_layout; + using self_type = variable_size_binary_layout; using inner_value_type = T; + using inner_reference = R; using inner_const_reference = CR; using value_type = std::optional; using const_reference = const_reference_proxy; using size_type = std::size_t; + using const_value_iterator = vs_binary_value_iterator; + explicit variable_size_binary_layout(array_data data); size_type size() const; const_reference operator[](size_type i) const; + const_value_iterator value_cbegin() const; + const_value_iterator value_cend() const; + private: - using sequence_value_type = typename T::value_type; + using data_type = typename T::value_type; + using offset_iterator = OT*; + using const_offset_iterator = const OT*; + using data_iterator = data_type*; + using const_data_iterator = const data_type*; bool has_value(size_type i) const; inner_const_reference value(size_type i) const; + const_offset_iterator offset(size_type i) const; + const_offset_iterator offset_end() const; + const_data_iterator data(size_type i) const; + // We use the bitmap and the first two buffers // The first buffer contains the offsets of // the elements in the second buffer array_data m_data; friend class const_reference_proxy; + friend class vs_binary_value_iterator; }; + /******************************************* + * vs_binary_value_iterator implementation * + *******************************************/ + + template + vs_binary_value_iterator::vs_binary_value_iterator( + offset_iterator offset_it, + data_iterator data_begin + ) + : m_offset_it(offset_it) + , m_data_begin(data_begin) + { + } + + template + auto vs_binary_value_iterator::dereference() const -> reference + { + return reference(m_data_begin + *m_offset_it, m_data_begin + *(m_offset_it + 1)); + } + + template + void vs_binary_value_iterator::increment() + { + ++m_offset_it; + } + + template + void vs_binary_value_iterator::decrement() + { + --m_offset_it; + } + + template + void vs_binary_value_iterator::advance(difference_type n) + { + m_offset_it += n; + } + + template + auto vs_binary_value_iterator::distance_to(const self_type& rhs) const -> difference_type + { + return rhs.m_offset_it - m_offset_it; + } + + template + bool vs_binary_value_iterator::equal(const self_type& rhs) const + { + return m_offset_it == rhs.m_offset_it; + } + + template + bool vs_binary_value_iterator::less_than(const self_type& rhs) const + { + return m_offset_it < rhs.m_offset_it; + } + /********************************************** * variable_size_binary_layout implementation * **********************************************/ - template - variable_size_binary_layout::variable_size_binary_layout(array_data data) + template + variable_size_binary_layout::variable_size_binary_layout(array_data data) : m_data(std::move(data)) { assert(m_data.buffers.size() == 2u); } - template - auto variable_size_binary_layout::size() const -> size_type + template + auto variable_size_binary_layout::size() const -> size_type { return static_cast(m_data.length - m_data.offset); } - template - auto variable_size_binary_layout::operator[](size_type i) const -> const_reference + template + auto variable_size_binary_layout::operator[](size_type i) const -> const_reference { return const_reference(*this, i); } - template - auto variable_size_binary_layout::has_value(size_type i) const -> bool + template + auto variable_size_binary_layout::value_cbegin() const -> const_value_iterator + { + return const_value_iterator(offset(0u), data(0u)); + } + + template + auto variable_size_binary_layout::value_cend() const -> const_value_iterator + { + return const_value_iterator(offset_end(), data(0u)); + } + + template + auto variable_size_binary_layout::has_value(size_type i) const -> bool { return m_data.bitmap.test(i + m_data.offset); } - template - auto variable_size_binary_layout::value(size_type i) const -> inner_const_reference + template + auto variable_size_binary_layout::value(size_type i) const -> inner_const_reference + { + return inner_const_reference(data(*offset(i)), data(*offset(i+1))); + } + + template + auto variable_size_binary_layout::offset(size_type i) const -> const_offset_iterator + { + return m_data.buffers[0].data() + m_data.offset + i; + } + + template + auto variable_size_binary_layout::offset_end() const -> const_offset_iterator + { + return m_data.buffers[0].data() + m_data.length; + } + + template + auto variable_size_binary_layout::data(size_type i) const -> const_data_iterator { - const auto& offsets = m_data.buffers[0]; - size_type first_index = offsets[m_data.offset + i]; - size_type last_index = offsets[m_data.offset + i + 1]; - const auto* seq = m_data.buffers[1].data(); - return CR(seq + first_index, seq + last_index); + return m_data.buffers[1].data() + i; } } diff --git a/test/test_variable_size_binary_layout.cpp b/test/test_variable_size_binary_layout.cpp index 3db03824..c9131bc0 100644 --- a/test/test_variable_size_binary_layout.cpp +++ b/test/test_variable_size_binary_layout.cpp @@ -29,15 +29,15 @@ namespace sparrow vs_binary_fixture() { m_data.buffers.resize(2); - m_data.buffers[0].resize(nb_words + 1); + m_data.buffers[0].resize(sizeof(std::int64_t) * (nb_words + 1)); m_data.buffers[1].resize(std::accumulate( words, words + nb_words, 0u, [](std::size_t res, const auto& s) { return res + s.size(); } )); - m_data.buffers[0][0] = 0u; + m_data.buffers[0].data()[0] = 0u; auto iter = m_data.buffers[1].begin(); for (size_t i = 0; i < nb_words; ++i) { - m_data.buffers[0][i+1] = m_data.buffers[0][i] + words[i].size(); + offset()[i+1] = offset()[i] + words[i].size(); std::copy(words[i].cbegin(), words[i].cend(), iter); iter += words[i].size(); } @@ -56,11 +56,26 @@ namespace sparrow }; array_data m_data; - using layout_type = variable_size_binary_layout; + using layout_type = variable_size_binary_layout; + + private: + + std::int64_t* offset() + { + return m_data.buffers[0].data(); + } }; TEST_SUITE("variable_size_binary_layout") { + TEST_CASE_FIXTURE(vs_binary_fixture, "types") + { + static_assert(std::same_as); + using const_value_iterator = layout_type::const_value_iterator; + static_assert(std::same_as); + static_assert(std::same_as); + } + TEST_CASE_FIXTURE(vs_binary_fixture, "size") { layout_type l(m_data); @@ -74,10 +89,28 @@ namespace sparrow auto cref1 = l[1]; auto cref2 = l[2]; - std::cout << cref0.value() << std::endl; CHECK_EQ(cref0.value(), words[1]); CHECK_EQ(cref1.value(), words[2]); CHECK_EQ(cref2.value(), words[3]); } + + TEST_CASE_FIXTURE(vs_binary_fixture, "const_value_iterator") + { + layout_type l(m_data); + auto cref0 = l[0]; + auto cref1 = l[1]; + auto cref2 = l[2]; + + auto iter = l.value_cbegin(); + CHECK_EQ(*iter, cref0.value()); + ++iter; + CHECK_EQ(*iter, cref1.value()); + --iter; + CHECK_EQ(*iter, cref0.value()); + iter += 2; + CHECK_EQ(*iter, cref2.value()); + ++iter; + CHECK_EQ(iter, l.value_cend()); + } } } From 7da32d89d0f1908f4ddc12d7c1cf554f3e6cdca6 Mon Sep 17 00:00:00 2001 From: Johan Mabille Date: Thu, 21 Mar 2024 10:09:21 +0100 Subject: [PATCH 03/14] I'm a code of wealth and taste --- .../sparrow/variable_size_binary_layout.hpp | 57 +++++++++++++++++-- test/test_variable_size_binary_layout.cpp | 15 +++++ 2 files changed, 68 insertions(+), 4 deletions(-) diff --git a/include/sparrow/variable_size_binary_layout.hpp b/include/sparrow/variable_size_binary_layout.hpp index 861d4754..b44a4e3c 100644 --- a/include/sparrow/variable_size_binary_layout.hpp +++ b/include/sparrow/variable_size_binary_layout.hpp @@ -35,6 +35,15 @@ namespace sparrow using get_inner_reference_t = typename get_inner_reference::type; } + /* + * @class vs_binary_value_iterator + * + * @brief Iterator over the data values of a variable size binary + * layout. + * + * @tparam L the layout type + * @tparam is_const a boolean flag specifying whether this iterator is const. + */ template class vs_binary_value_iterator : public iterator_base < @@ -86,9 +95,35 @@ namespace sparrow friend class iterator_access; }; + /* + * @class variabe_size_binary_layout + * + * @brief Layout for arrays containing values consisting of a variable number of bytes. + * + * This layout is used to retrieve data in an array of values of a variable number of bytes + * (typically string objects). Values are stored contiguously in a data buffer, a single + * value is retrieved via an additional offset buffer, where each element is the beginning + * of the corresponinding value in the data buffer. + * + * Example: + * + * Let's consider the array ['please', 'allow', 'me', 'to', 'introduce', 'myself']. + * The internal buffers will be: + * - offset: [0, 6, 11, 13, 15, 24, 30] + * - data: ['p','l','e','a','s','e','a','l','l','o','w','m','e','t','o','i','n','t','r','o','d','u','c','e','m','y','s','e','l','f'] + * + * @tparam T the type of the data + * @tparam R the reference type to the data. This type is different form the reference type of the layout, + * which behaves like std::optional. + * @tparam CR the const reference type to the data. This type is different from the const reference of the layout, + * which behaves like std::optional. + * @tparam OT type of the offset values. Must be std::int64_t or std::int32_t + */ template class variable_size_binary_layout { + static_assert(std::same_as || std::same_as); + public: using self_type = variable_size_binary_layout; @@ -99,6 +134,7 @@ namespace sparrow using const_reference = const_reference_proxy; using size_type = std::size_t; + using const_bitmap_iterator = array_data::bitmap_type::const_iterator; using const_value_iterator = vs_binary_value_iterator; explicit variable_size_binary_layout(array_data data); @@ -109,6 +145,9 @@ namespace sparrow const_value_iterator value_cbegin() const; const_value_iterator value_cend() const; + const_bitmap_iterator bitmap_cbegin() const; + const_bitmap_iterator bitmap_cend() const; + private: using data_type = typename T::value_type; @@ -124,9 +163,6 @@ namespace sparrow const_offset_iterator offset_end() const; const_data_iterator data(size_type i) const; - // We use the bitmap and the first two buffers - // The first buffer contains the offsets of - // the elements in the second buffer array_data m_data; friend class const_reference_proxy; @@ -198,6 +234,7 @@ namespace sparrow : m_data(std::move(data)) { assert(m_data.buffers.size() == 2u); + assert(m_data.buffers[0].size() == 0u || m_data.buffers[0].back() == m_data.buffers[1].size()); } template @@ -211,6 +248,18 @@ namespace sparrow { return const_reference(*this, i); } + + template + auto variable_size_binary_layout::bitmap_cbegin() const -> const_bitmap_iterator + { + return m_data.bitmap.cbegin() + m_data.offset; + } + + template + auto variable_size_binary_layout::bitmap_cend() const -> const_bitmap_iterator + { + return m_data.bitmap.cend(); + } template auto variable_size_binary_layout::value_cbegin() const -> const_value_iterator @@ -223,7 +272,7 @@ namespace sparrow { return const_value_iterator(offset_end(), data(0u)); } - + template auto variable_size_binary_layout::has_value(size_type i) const -> bool { diff --git a/test/test_variable_size_binary_layout.cpp b/test/test_variable_size_binary_layout.cpp index c9131bc0..688424c9 100644 --- a/test/test_variable_size_binary_layout.cpp +++ b/test/test_variable_size_binary_layout.cpp @@ -28,6 +28,7 @@ namespace sparrow { vs_binary_fixture() { + m_data.bitmap.resize(nb_words); m_data.buffers.resize(2); m_data.buffers[0].resize(sizeof(std::int64_t) * (nb_words + 1)); m_data.buffers[1].resize(std::accumulate( @@ -40,8 +41,10 @@ namespace sparrow offset()[i+1] = offset()[i] + words[i].size(); std::copy(words[i].cbegin(), words[i].cend(), iter); iter += words[i].size(); + m_data.bitmap.set(i, true); } + m_data.bitmap.set(2, false); m_data.length = 4; m_data.offset = 1; } @@ -70,6 +73,7 @@ namespace sparrow { TEST_CASE_FIXTURE(vs_binary_fixture, "types") { + static_assert(std::same_as); static_assert(std::same_as); using const_value_iterator = layout_type::const_value_iterator; static_assert(std::same_as); @@ -112,5 +116,16 @@ namespace sparrow ++iter; CHECK_EQ(iter, l.value_cend()); } + + TEST_CASE_FIXTURE(vs_binary_fixture, "const_bitmap_iterator") + { + layout_type l(m_data); + auto iter = l.bitmap_cbegin(); + CHECK(*iter); + ++iter; + CHECK(!*iter); + iter += 2; + CHECK_EQ(iter, l.bitmap_cend()); + } } } From a62b12e16153e758c6275f6c3627667a721336ca Mon Sep 17 00:00:00 2001 From: Johan Mabille Date: Thu, 21 Mar 2024 10:26:46 +0100 Subject: [PATCH 04/14] Pleased to meet you, hope you guess typename ;)) --- include/sparrow/variable_size_binary_layout.hpp | 3 ++- test/test_variable_size_binary_layout.cpp | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/include/sparrow/variable_size_binary_layout.hpp b/include/sparrow/variable_size_binary_layout.hpp index b44a4e3c..63b65857 100644 --- a/include/sparrow/variable_size_binary_layout.hpp +++ b/include/sparrow/variable_size_binary_layout.hpp @@ -234,7 +234,8 @@ namespace sparrow : m_data(std::move(data)) { assert(m_data.buffers.size() == 2u); - assert(m_data.buffers[0].size() == 0u || m_data.buffers[0].back() == m_data.buffers[1].size()); + //TODO: templatize back and front in buffer + //assert(m_data.buffers[0].size() == 0u || m_data.buffers[0].back() == m_data.buffers[1].size()); } template diff --git a/test/test_variable_size_binary_layout.cpp b/test/test_variable_size_binary_layout.cpp index 688424c9..6832b568 100644 --- a/test/test_variable_size_binary_layout.cpp +++ b/test/test_variable_size_binary_layout.cpp @@ -59,6 +59,7 @@ namespace sparrow }; array_data m_data; + // TODO: replace R = std::string_view with specific reference proxy using layout_type = variable_size_binary_layout; private: @@ -94,7 +95,7 @@ namespace sparrow auto cref2 = l[2]; CHECK_EQ(cref0.value(), words[1]); - CHECK_EQ(cref1.value(), words[2]); + CHECK(!cref1.has_value()); CHECK_EQ(cref2.value(), words[3]); } @@ -108,7 +109,6 @@ namespace sparrow auto iter = l.value_cbegin(); CHECK_EQ(*iter, cref0.value()); ++iter; - CHECK_EQ(*iter, cref1.value()); --iter; CHECK_EQ(*iter, cref0.value()); iter += 2; From 48267402147a8f2aa1a9208fef909c2e2f559816 Mon Sep 17 00:00:00 2001 From: Johan Mabille Date: Thu, 21 Mar 2024 10:31:26 +0100 Subject: [PATCH 05/14] ... --- test/test_variable_size_binary_layout.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_variable_size_binary_layout.cpp b/test/test_variable_size_binary_layout.cpp index 6832b568..90c15b33 100644 --- a/test/test_variable_size_binary_layout.cpp +++ b/test/test_variable_size_binary_layout.cpp @@ -50,7 +50,7 @@ namespace sparrow } static constexpr size_t nb_words = 4u; - static constexpr std::string words[nb_words] = + static constexpr std::string_view words[nb_words] = { "you", "are", From 2a3fc712163d9f6860cadf8798bf52e7386c91a2 Mon Sep 17 00:00:00 2001 From: Johan Mabille Date: Thu, 21 Mar 2024 10:39:15 +0100 Subject: [PATCH 06/14] Make me green --- include/sparrow/variable_size_binary_layout.hpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/sparrow/variable_size_binary_layout.hpp b/include/sparrow/variable_size_binary_layout.hpp index 63b65857..b07e96f4 100644 --- a/include/sparrow/variable_size_binary_layout.hpp +++ b/include/sparrow/variable_size_binary_layout.hpp @@ -289,19 +289,19 @@ namespace sparrow template auto variable_size_binary_layout::offset(size_type i) const -> const_offset_iterator { - return m_data.buffers[0].data() + m_data.offset + i; + return m_data.buffers[0].template data() + m_data.offset + i; } template auto variable_size_binary_layout::offset_end() const -> const_offset_iterator { - return m_data.buffers[0].data() + m_data.length; + return m_data.buffers[0].template data() + m_data.length; } template auto variable_size_binary_layout::data(size_type i) const -> const_data_iterator { - return m_data.buffers[1].data() + i; + return m_data.buffers[1].template data() + i; } } From 990666841393903f445f1a4ada19948f42ccccf0 Mon Sep 17 00:00:00 2001 From: Johan Mabille Date: Thu, 21 Mar 2024 15:46:22 +0100 Subject: [PATCH 07/14] Addressed review comments --- include/sparrow/mp_utils.hpp | 1 - include/sparrow/variable_size_binary_layout.hpp | 14 +++++++++----- test/test_variable_size_binary_layout.cpp | 16 +++++++--------- 3 files changed, 16 insertions(+), 15 deletions(-) diff --git a/include/sparrow/mp_utils.hpp b/include/sparrow/mp_utils.hpp index 34361cc8..70260c34 100644 --- a/include/sparrow/mp_utils.hpp +++ b/include/sparrow/mp_utils.hpp @@ -28,6 +28,5 @@ namespace sparrow template using constify_t = typename constify::type; - } } diff --git a/include/sparrow/variable_size_binary_layout.hpp b/include/sparrow/variable_size_binary_layout.hpp index b07e96f4..6f812ca7 100644 --- a/include/sparrow/variable_size_binary_layout.hpp +++ b/include/sparrow/variable_size_binary_layout.hpp @@ -101,13 +101,14 @@ namespace sparrow * @brief Layout for arrays containing values consisting of a variable number of bytes. * * This layout is used to retrieve data in an array of values of a variable number of bytes - * (typically string objects). Values are stored contiguously in a data buffer, a single - * value is retrieved via an additional offset buffer, where each element is the beginning - * of the corresponinding value in the data buffer. + * (typically string objects). Values are stored contiguously in a data buffer (for instance + * a buffer of char if values are strings), a single value is retrieved via an additional + * offset buffer, where each element is the beginning of the corresponding value in the data + * buffer. * * Example: * - * Let's consider the array ['please', 'allow', 'me', 'to', 'introduce', 'myself']. + * Let's consider the array of string ['please', 'allow', 'me', 'to', 'introduce', 'myself']. * The internal buffers will be: * - offset: [0, 6, 11, 13, 15, 24, 30] * - data: ['p','l','e','a','s','e','a','l','l','o','w','m','e','t','o','i','n','t','r','o','d','u','c','e','m','y','s','e','l','f'] @@ -234,7 +235,7 @@ namespace sparrow : m_data(std::move(data)) { assert(m_data.buffers.size() == 2u); - //TODO: templatize back and front in buffer + //TODO: templatize back and front in buffer and uncomment the following line //assert(m_data.buffers[0].size() == 0u || m_data.buffers[0].back() == m_data.buffers[1].size()); } @@ -289,18 +290,21 @@ namespace sparrow template auto variable_size_binary_layout::offset(size_type i) const -> const_offset_iterator { + assert(!m_data.buffers.empty()); return m_data.buffers[0].template data() + m_data.offset + i; } template auto variable_size_binary_layout::offset_end() const -> const_offset_iterator { + assert(!m_data.buffers.empty()); return m_data.buffers[0].template data() + m_data.length; } template auto variable_size_binary_layout::data(size_type i) const -> const_data_iterator { + assert(!m_data.buffers.empty()); return m_data.buffers[1].template data() + i; } } diff --git a/test/test_variable_size_binary_layout.cpp b/test/test_variable_size_binary_layout.cpp index 90c15b33..0f0fbd99 100644 --- a/test/test_variable_size_binary_layout.cpp +++ b/test/test_variable_size_binary_layout.cpp @@ -66,21 +66,19 @@ namespace sparrow std::int64_t* offset() { + assert(!m_data.buffers.empty()); return m_data.buffers[0].data(); } + + static_assert(std::same_as); + static_assert(std::same_as); + using const_value_iterator = layout_type::const_value_iterator; + static_assert(std::same_as); + static_assert(std::same_as); }; TEST_SUITE("variable_size_binary_layout") { - TEST_CASE_FIXTURE(vs_binary_fixture, "types") - { - static_assert(std::same_as); - static_assert(std::same_as); - using const_value_iterator = layout_type::const_value_iterator; - static_assert(std::same_as); - static_assert(std::same_as); - } - TEST_CASE_FIXTURE(vs_binary_fixture, "size") { layout_type l(m_data); From a84fc8fff79edea5b8674ba8fe6ab050a082db0e Mon Sep 17 00:00:00 2001 From: Johan Mabille Date: Thu, 21 Mar 2024 15:47:16 +0100 Subject: [PATCH 08/14] typo --- include/sparrow/variable_size_binary_layout.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/sparrow/variable_size_binary_layout.hpp b/include/sparrow/variable_size_binary_layout.hpp index 6f812ca7..d04737f4 100644 --- a/include/sparrow/variable_size_binary_layout.hpp +++ b/include/sparrow/variable_size_binary_layout.hpp @@ -96,7 +96,7 @@ namespace sparrow }; /* - * @class variabe_size_binary_layout + * @class variable_size_binary_layout * * @brief Layout for arrays containing values consisting of a variable number of bytes. * From c1076a42df55a3c047821e4a4004ee9a3327ccbe Mon Sep 17 00:00:00 2001 From: Johan Mabille Date: Thu, 21 Mar 2024 17:57:43 +0100 Subject: [PATCH 09/14] Rebased --- include/sparrow/array_data.hpp | 12 ++++++------ include/sparrow/variable_size_binary_layout.hpp | 4 +++- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/include/sparrow/array_data.hpp b/include/sparrow/array_data.hpp index 8a441205..b5f09bb6 100644 --- a/include/sparrow/array_data.hpp +++ b/include/sparrow/array_data.hpp @@ -116,11 +116,11 @@ namespace sparrow using base_type = reference_proxy_base; using layout_type = L; using value_type = typename L::inner_value_type; - using reference = typename L::inner_const_reference; + using const_reference = typename L::inner_const_reference; using bitmap_reference = typename L::bitmap_const_reference; using size_type = typename L::size_type; - const_reference_proxy(reference val_ref, bitmap_reference bit_ref); + const_reference_proxy(const_reference val_ref, bitmap_reference bit_ref); ~const_reference_proxy() = default; const_reference_proxy(const self_type&) = default; @@ -129,11 +129,11 @@ namespace sparrow bool has_value() const; explicit operator bool() const; - const value_type& value() const; + const_reference value() const; private: - reference m_val_ref; + const_reference m_val_ref; bitmap_reference m_bit_ref; }; @@ -259,7 +259,7 @@ namespace sparrow ****************************************/ template - const_reference_proxy::const_reference_proxy(reference val_ref, bitmap_reference bit_ref) + const_reference_proxy::const_reference_proxy(const_reference val_ref, bitmap_reference bit_ref) : m_val_ref(val_ref) , m_bit_ref(bit_ref) { @@ -278,7 +278,7 @@ namespace sparrow } template - auto const_reference_proxy::value() const -> const value_type& + auto const_reference_proxy::value() const -> const_reference { assert(has_value()); return m_val_ref; diff --git a/include/sparrow/variable_size_binary_layout.hpp b/include/sparrow/variable_size_binary_layout.hpp index d04737f4..df0dd66e 100644 --- a/include/sparrow/variable_size_binary_layout.hpp +++ b/include/sparrow/variable_size_binary_layout.hpp @@ -131,6 +131,8 @@ namespace sparrow using inner_value_type = T; using inner_reference = R; using inner_const_reference = CR; + using bitmap_type = array_data::bitmap_type; + using bitmap_const_reference = typename bitmap_type::const_reference; using value_type = std::optional; using const_reference = const_reference_proxy; using size_type = std::size_t; @@ -248,7 +250,7 @@ namespace sparrow template auto variable_size_binary_layout::operator[](size_type i) const -> const_reference { - return const_reference(*this, i); + return const_reference(value(i), has_value(i)); } template From fed3e6f84965783b9e7bb535cfbcf3e7544e3eca Mon Sep 17 00:00:00 2001 From: Johan Mabille Date: Tue, 26 Mar 2024 06:09:08 +0100 Subject: [PATCH 10/14] Returns ranges for values and bitmap in layout --- .../sparrow/variable_size_binary_layout.hpp | 79 ++++++++++++------- test/test_variable_size_binary_layout.cpp | 10 ++- 2 files changed, 58 insertions(+), 31 deletions(-) diff --git a/include/sparrow/variable_size_binary_layout.hpp b/include/sparrow/variable_size_binary_layout.hpp index df0dd66e..322ef3f2 100644 --- a/include/sparrow/variable_size_binary_layout.hpp +++ b/include/sparrow/variable_size_binary_layout.hpp @@ -14,6 +14,7 @@ #pragma once +#include #include "sparrow/array_data.hpp" #include "sparrow/iterator.hpp" @@ -35,7 +36,10 @@ namespace sparrow using get_inner_reference_t = typename get_inner_reference::type; } - /* + template + concept layout_offset = std::same_as || std::same_as; + + /** * @class vs_binary_value_iterator * * @brief Iterator over the data values of a variable size binary @@ -90,7 +94,7 @@ namespace sparrow bool less_than(const self_type& rhs) const; offset_iterator m_offset_it; - const data_iterator m_data_begin; + data_iterator m_data_begin; friend class iterator_access; }; @@ -114,17 +118,15 @@ namespace sparrow * - data: ['p','l','e','a','s','e','a','l','l','o','w','m','e','t','o','i','n','t','r','o','d','u','c','e','m','y','s','e','l','f'] * * @tparam T the type of the data - * @tparam R the reference type to the data. This type is different form the reference type of the layout, + * @tparam R the reference type to the data. This type is different from the reference type of the layout, * which behaves like std::optional. * @tparam CR the const reference type to the data. This type is different from the const reference of the layout, * which behaves like std::optional. - * @tparam OT type of the offset values. Must be std::int64_t or std::int32_t + * @tparam OT type of the offset values. Must be std::int64_t or std::int32_t. */ - template + template class variable_size_binary_layout { - static_assert(std::same_as || std::same_as); - public: using self_type = variable_size_binary_layout; @@ -137,28 +139,39 @@ namespace sparrow using const_reference = const_reference_proxy; using size_type = std::size_t; + /** + * These types have to be public to be accessible when + * instantiating const_value_iterator for checking the + * requirements of subrange. + */ + using data_type = typename T::value_type; + using offset_iterator = OT*; + using const_offset_iterator = const OT*; + using data_iterator = data_type*; + using const_data_iterator = const data_type*; + using const_bitmap_iterator = array_data::bitmap_type::const_iterator; using const_value_iterator = vs_binary_value_iterator; + using const_bitmap_range = std::ranges::subrange; + using const_value_range = std::ranges::subrange; + explicit variable_size_binary_layout(array_data data); size_type size() const; const_reference operator[](size_type i) const; + const_bitmap_range bitmap() const; + const_value_range values() const; + + private: + const_value_iterator value_cbegin() const; const_value_iterator value_cend() const; const_bitmap_iterator bitmap_cbegin() const; const_bitmap_iterator bitmap_cend() const; - private: - - using data_type = typename T::value_type; - using offset_iterator = OT*; - using const_offset_iterator = const OT*; - using data_iterator = data_type*; - using const_data_iterator = const data_type*; - bool has_value(size_type i) const; inner_const_reference value(size_type i) const; @@ -232,7 +245,7 @@ namespace sparrow * variable_size_binary_layout implementation * **********************************************/ - template + template variable_size_binary_layout::variable_size_binary_layout(array_data data) : m_data(std::move(data)) { @@ -241,69 +254,81 @@ namespace sparrow //assert(m_data.buffers[0].size() == 0u || m_data.buffers[0].back() == m_data.buffers[1].size()); } - template + template auto variable_size_binary_layout::size() const -> size_type { return static_cast(m_data.length - m_data.offset); } - template + template auto variable_size_binary_layout::operator[](size_type i) const -> const_reference { return const_reference(value(i), has_value(i)); } - template + template + auto variable_size_binary_layout::bitmap() const -> const_bitmap_range + { + return std::ranges::subrange(bitmap_cbegin(), bitmap_cend()); + } + + template + auto variable_size_binary_layout::values() const -> const_value_range + { + return std::ranges::subrange(value_cbegin(), value_cend()); + } + + template auto variable_size_binary_layout::bitmap_cbegin() const -> const_bitmap_iterator { return m_data.bitmap.cbegin() + m_data.offset; } - template + template auto variable_size_binary_layout::bitmap_cend() const -> const_bitmap_iterator { return m_data.bitmap.cend(); } - template + template auto variable_size_binary_layout::value_cbegin() const -> const_value_iterator { return const_value_iterator(offset(0u), data(0u)); } - template + template auto variable_size_binary_layout::value_cend() const -> const_value_iterator { return const_value_iterator(offset_end(), data(0u)); } - template + template auto variable_size_binary_layout::has_value(size_type i) const -> bool { return m_data.bitmap.test(i + m_data.offset); } - template + template auto variable_size_binary_layout::value(size_type i) const -> inner_const_reference { return inner_const_reference(data(*offset(i)), data(*offset(i+1))); } - template + template auto variable_size_binary_layout::offset(size_type i) const -> const_offset_iterator { assert(!m_data.buffers.empty()); return m_data.buffers[0].template data() + m_data.offset + i; } - template + template auto variable_size_binary_layout::offset_end() const -> const_offset_iterator { assert(!m_data.buffers.empty()); return m_data.buffers[0].template data() + m_data.length; } - template + template auto variable_size_binary_layout::data(size_type i) const -> const_data_iterator { assert(!m_data.buffers.empty()); diff --git a/test/test_variable_size_binary_layout.cpp b/test/test_variable_size_binary_layout.cpp index 0f0fbd99..87d4275e 100644 --- a/test/test_variable_size_binary_layout.cpp +++ b/test/test_variable_size_binary_layout.cpp @@ -104,7 +104,8 @@ namespace sparrow auto cref1 = l[1]; auto cref2 = l[2]; - auto iter = l.value_cbegin(); + auto vrange = l.values(); + auto iter = vrange.begin(); CHECK_EQ(*iter, cref0.value()); ++iter; --iter; @@ -112,18 +113,19 @@ namespace sparrow iter += 2; CHECK_EQ(*iter, cref2.value()); ++iter; - CHECK_EQ(iter, l.value_cend()); + CHECK_EQ(iter, vrange.end()); } TEST_CASE_FIXTURE(vs_binary_fixture, "const_bitmap_iterator") { layout_type l(m_data); - auto iter = l.bitmap_cbegin(); + auto brange = l.bitmap(); + auto iter = brange.begin(); CHECK(*iter); ++iter; CHECK(!*iter); iter += 2; - CHECK_EQ(iter, l.bitmap_cend()); + CHECK_EQ(iter, brange.end()); } } } From bdd911eeda40065b0f50f872402775ce7ed668a8 Mon Sep 17 00:00:00 2001 From: Johan Mabille Date: Tue, 26 Mar 2024 06:10:55 +0100 Subject: [PATCH 11/14] Adddressed leftover review comment --- include/sparrow/variable_size_binary_layout.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/sparrow/variable_size_binary_layout.hpp b/include/sparrow/variable_size_binary_layout.hpp index 322ef3f2..1838931b 100644 --- a/include/sparrow/variable_size_binary_layout.hpp +++ b/include/sparrow/variable_size_binary_layout.hpp @@ -117,7 +117,7 @@ namespace sparrow * - offset: [0, 6, 11, 13, 15, 24, 30] * - data: ['p','l','e','a','s','e','a','l','l','o','w','m','e','t','o','i','n','t','r','o','d','u','c','e','m','y','s','e','l','f'] * - * @tparam T the type of the data + * @tparam T the type of the data stored in the data buffer, not its byt representation. * @tparam R the reference type to the data. This type is different from the reference type of the layout, * which behaves like std::optional. * @tparam CR the const reference type to the data. This type is different from the const reference of the layout, From 45282532ffefced23fa8bfb5bbf2e775b09ac33a Mon Sep 17 00:00:00 2001 From: Johan Mabille Date: Tue, 26 Mar 2024 06:15:14 +0100 Subject: [PATCH 12/14] Squash me, I'm famous --- include/sparrow/array_data.hpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/sparrow/array_data.hpp b/include/sparrow/array_data.hpp index b5f09bb6..1fe42d79 100644 --- a/include/sparrow/array_data.hpp +++ b/include/sparrow/array_data.hpp @@ -117,10 +117,10 @@ namespace sparrow using layout_type = L; using value_type = typename L::inner_value_type; using const_reference = typename L::inner_const_reference; - using bitmap_reference = typename L::bitmap_const_reference; + using bitmap_const_reference = typename L::bitmap_const_reference; using size_type = typename L::size_type; - const_reference_proxy(const_reference val_ref, bitmap_reference bit_ref); + const_reference_proxy(const_reference val_ref, bitmap_const_reference bit_ref); ~const_reference_proxy() = default; const_reference_proxy(const self_type&) = default; @@ -134,7 +134,7 @@ namespace sparrow private: const_reference m_val_ref; - bitmap_reference m_bit_ref; + bitmap_const_reference m_bit_ref; }; /** @@ -259,7 +259,7 @@ namespace sparrow ****************************************/ template - const_reference_proxy::const_reference_proxy(const_reference val_ref, bitmap_reference bit_ref) + const_reference_proxy::const_reference_proxy(const_reference val_ref, bitmap_const_reference bit_ref) : m_val_ref(val_ref) , m_bit_ref(bit_ref) { From b33e7305048de4fde04acfa001832d7e02608fb5 Mon Sep 17 00:00:00 2001 From: Johan Mabille Date: Tue, 26 Mar 2024 14:57:12 +0100 Subject: [PATCH 13/14] Squash me baby one more time! --- include/sparrow/variable_size_binary_layout.hpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/sparrow/variable_size_binary_layout.hpp b/include/sparrow/variable_size_binary_layout.hpp index 1838931b..575eec7c 100644 --- a/include/sparrow/variable_size_binary_layout.hpp +++ b/include/sparrow/variable_size_binary_layout.hpp @@ -117,7 +117,7 @@ namespace sparrow * - offset: [0, 6, 11, 13, 15, 24, 30] * - data: ['p','l','e','a','s','e','a','l','l','o','w','m','e','t','o','i','n','t','r','o','d','u','c','e','m','y','s','e','l','f'] * - * @tparam T the type of the data stored in the data buffer, not its byt representation. + * @tparam T the type of the data stored in the data buffer, not its byte representation. * @tparam R the reference type to the data. This type is different from the reference type of the layout, * which behaves like std::optional. * @tparam CR the const reference type to the data. This type is different from the const reference of the layout, @@ -257,12 +257,14 @@ namespace sparrow template auto variable_size_binary_layout::size() const -> size_type { + assert(m_data.offset <= m_data.lenght); return static_cast(m_data.length - m_data.offset); } template auto variable_size_binary_layout::operator[](size_type i) const -> const_reference { + assert(i < size()); return const_reference(value(i), has_value(i)); } From 93f7405844aaa58b773c490874ac4cc32253c3f4 Mon Sep 17 00:00:00 2001 From: Johan Mabille Date: Tue, 26 Mar 2024 15:13:55 +0100 Subject: [PATCH 14/14] .. --- include/sparrow/variable_size_binary_layout.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/sparrow/variable_size_binary_layout.hpp b/include/sparrow/variable_size_binary_layout.hpp index 575eec7c..36f93f27 100644 --- a/include/sparrow/variable_size_binary_layout.hpp +++ b/include/sparrow/variable_size_binary_layout.hpp @@ -257,7 +257,7 @@ namespace sparrow template auto variable_size_binary_layout::size() const -> size_type { - assert(m_data.offset <= m_data.lenght); + assert(m_data.offset <= m_data.length); return static_cast(m_data.length - m_data.offset); }