diff --git a/include/sparrow/variable_size_binary_layout.hpp b/include/sparrow/variable_size_binary_layout.hpp index 861d4754..b44a4e3c 100644 --- a/include/sparrow/variable_size_binary_layout.hpp +++ b/include/sparrow/variable_size_binary_layout.hpp @@ -35,6 +35,15 @@ namespace sparrow using get_inner_reference_t = typename get_inner_reference::type; } + /* + * @class vs_binary_value_iterator + * + * @brief Iterator over the data values of a variable size binary + * layout. + * + * @tparam L the layout type + * @tparam is_const a boolean flag specifying whether this iterator is const. + */ template class vs_binary_value_iterator : public iterator_base < @@ -86,9 +95,35 @@ namespace sparrow friend class iterator_access; }; + /* + * @class variabe_size_binary_layout + * + * @brief Layout for arrays containing values consisting of a variable number of bytes. + * + * This layout is used to retrieve data in an array of values of a variable number of bytes + * (typically string objects). Values are stored contiguously in a data buffer, a single + * value is retrieved via an additional offset buffer, where each element is the beginning + * of the corresponinding value in the data buffer. + * + * Example: + * + * Let's consider the array ['please', 'allow', 'me', 'to', 'introduce', 'myself']. + * The internal buffers will be: + * - offset: [0, 6, 11, 13, 15, 24, 30] + * - data: ['p','l','e','a','s','e','a','l','l','o','w','m','e','t','o','i','n','t','r','o','d','u','c','e','m','y','s','e','l','f'] + * + * @tparam T the type of the data + * @tparam R the reference type to the data. This type is different form the reference type of the layout, + * which behaves like std::optional. + * @tparam CR the const reference type to the data. This type is different from the const reference of the layout, + * which behaves like std::optional. + * @tparam OT type of the offset values. Must be std::int64_t or std::int32_t + */ template class variable_size_binary_layout { + static_assert(std::same_as || std::same_as); + public: using self_type = variable_size_binary_layout; @@ -99,6 +134,7 @@ namespace sparrow using const_reference = const_reference_proxy; using size_type = std::size_t; + using const_bitmap_iterator = array_data::bitmap_type::const_iterator; using const_value_iterator = vs_binary_value_iterator; explicit variable_size_binary_layout(array_data data); @@ -109,6 +145,9 @@ namespace sparrow const_value_iterator value_cbegin() const; const_value_iterator value_cend() const; + const_bitmap_iterator bitmap_cbegin() const; + const_bitmap_iterator bitmap_cend() const; + private: using data_type = typename T::value_type; @@ -124,9 +163,6 @@ namespace sparrow const_offset_iterator offset_end() const; const_data_iterator data(size_type i) const; - // We use the bitmap and the first two buffers - // The first buffer contains the offsets of - // the elements in the second buffer array_data m_data; friend class const_reference_proxy; @@ -198,6 +234,7 @@ namespace sparrow : m_data(std::move(data)) { assert(m_data.buffers.size() == 2u); + assert(m_data.buffers[0].size() == 0u || m_data.buffers[0].back() == m_data.buffers[1].size()); } template @@ -211,6 +248,18 @@ namespace sparrow { return const_reference(*this, i); } + + template + auto variable_size_binary_layout::bitmap_cbegin() const -> const_bitmap_iterator + { + return m_data.bitmap.cbegin() + m_data.offset; + } + + template + auto variable_size_binary_layout::bitmap_cend() const -> const_bitmap_iterator + { + return m_data.bitmap.cend(); + } template auto variable_size_binary_layout::value_cbegin() const -> const_value_iterator @@ -223,7 +272,7 @@ namespace sparrow { return const_value_iterator(offset_end(), data(0u)); } - + template auto variable_size_binary_layout::has_value(size_type i) const -> bool { diff --git a/test/test_variable_size_binary_layout.cpp b/test/test_variable_size_binary_layout.cpp index c9131bc0..688424c9 100644 --- a/test/test_variable_size_binary_layout.cpp +++ b/test/test_variable_size_binary_layout.cpp @@ -28,6 +28,7 @@ namespace sparrow { vs_binary_fixture() { + m_data.bitmap.resize(nb_words); m_data.buffers.resize(2); m_data.buffers[0].resize(sizeof(std::int64_t) * (nb_words + 1)); m_data.buffers[1].resize(std::accumulate( @@ -40,8 +41,10 @@ namespace sparrow offset()[i+1] = offset()[i] + words[i].size(); std::copy(words[i].cbegin(), words[i].cend(), iter); iter += words[i].size(); + m_data.bitmap.set(i, true); } + m_data.bitmap.set(2, false); m_data.length = 4; m_data.offset = 1; } @@ -70,6 +73,7 @@ namespace sparrow { TEST_CASE_FIXTURE(vs_binary_fixture, "types") { + static_assert(std::same_as); static_assert(std::same_as); using const_value_iterator = layout_type::const_value_iterator; static_assert(std::same_as); @@ -112,5 +116,16 @@ namespace sparrow ++iter; CHECK_EQ(iter, l.value_cend()); } + + TEST_CASE_FIXTURE(vs_binary_fixture, "const_bitmap_iterator") + { + layout_type l(m_data); + auto iter = l.bitmap_cbegin(); + CHECK(*iter); + ++iter; + CHECK(!*iter); + iter += 2; + CHECK_EQ(iter, l.bitmap_cend()); + } } }