diff --git a/CMakeLists.txt b/CMakeLists.txt index a502ff5c..2c2796d8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -50,7 +50,11 @@ OPTION(BUILD_TESTS "sparrow test suite" OFF) # ===== set(SPARROW_HEADERS + ${SPARROW_INCLUDE_DIR}/sparrow/array_data.hpp ${SPARROW_INCLUDE_DIR}/sparrow/buffer.hpp + ${SPARROW_INCLUDE_DIR}/sparrow/data_type.hpp + ${SPARROW_INCLUDE_DIR}/sparrow/dynamic_bitset.hpp + ${SPARROW_INCLUDE_DIR}/sparrow/iterator.hpp ${SPARROW_INCLUDE_DIR}/sparrow/sparrow_version.hpp ) diff --git a/include/sparrow/array_data.hpp b/include/sparrow/array_data.hpp new file mode 100644 index 00000000..be0b0492 --- /dev/null +++ b/include/sparrow/array_data.hpp @@ -0,0 +1,187 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include + +#include "sparrow/buffer.hpp" +#include "sparrow/dynamic_bitset.hpp" +#include "sparrow/data_type.hpp" + +namespace sparrow +{ + struct array_data + { + data_descriptor type; + std::int64_t length = 0; + std::int64_t offset = 0; + using block_type = std::uint8_t; + using bitmap_type = dynamic_bitset; + // bitmap buffer and null_count + bitmap_type bitmap; + // Other buffers + std::vector> buffers; + std::vector child_data; + }; + + struct null_type + { + }; + constexpr null_type null; + + template + class reference_proxy + { + public: + + using self_type = reference_proxy; + using value_type = T; + using reference = T&; + using const_reference = const T&; + using pointer = T*; + using size_type = std::size_t; + + reference_proxy(reference ref, size_type index, array_data& ad); + reference_proxy(const self_type&) = default; + reference_proxy(self_type&&) = default; + + self_type& operator=(const self_type&); + self_type& operator=(self_type&&); + self_type& operator=(null_type); + + template + self_type& operator=(const U&); + + template + self_type& operator+=(const U&); + + template + self_type& operator-=(const U&); + + template + self_type& operator*=(const U&); + + template + self_type& operator/=(const U&); + + operator const_reference() const; + + private: + + void update_value(value_type value); + void update_value(null_type); + + pointer p_ref; + size_type m_index; + array_data* p_array_data; + }; + + /********************************** + * reference_proxy implementation * + **********************************/ + + template + reference_proxy::reference_proxy(reference ref, size_type index, array_data& ad) + : p_ref(&ref) + , m_index(index) + , p_array_data(&ad) + { + } + + template + auto reference_proxy::operator=(const self_type& rhs) -> self_type& + { + update_value(*rhs.p_ref); + return *this; + } + + template + auto reference_proxy::operator=(self_type&& rhs) -> self_type& + { + update_value(*rhs.p_ref); + return *this; + } + + template + auto reference_proxy::operator=(null_type rhs) -> self_type& + { + update_value(std::move(rhs)); + return *this; + } + + template + template + auto reference_proxy::operator=(const U& u) -> self_type& + { + update_value(u); + return *this; + } + + template + template + auto reference_proxy::operator+=(const U& u) -> self_type& + { + update_value(*p_ref + u); + return *this; + } + + + template + template + auto reference_proxy::operator-=(const U& u) -> self_type& + { + update_value(*p_ref - u); + return *this; + } + + template + template + auto reference_proxy::operator*=(const U& u) -> self_type& + { + update_value(*p_ref * u); + return *this; + } + + template + template + auto reference_proxy::operator/=(const U& u) -> self_type& + { + update_value(*p_ref / u); + return *this; + } + + template + reference_proxy::operator const_reference() const + { + return *p_ref; + } + + template + void reference_proxy::update_value(value_type value) + { + auto& bitmap = p_array_data->bitmap; + bitmap.set(m_index, true); + *p_ref = value; + } + + template + void reference_proxy::update_value(null_type) + { + auto& bitmap = p_array_data->bitmap; + bitmap.set(m_index, false); + } +} + diff --git a/include/sparrow/buffer.hpp b/include/sparrow/buffer.hpp index 8b7f5ffc..68ebe9d1 100644 --- a/include/sparrow/buffer.hpp +++ b/include/sparrow/buffer.hpp @@ -26,12 +26,23 @@ namespace sparrow struct buffer_data { using value_type = T; + using reference = T&; + using const_reference = const T&; using pointer = T*; using size_type = std::size_t; bool empty() const noexcept; size_type size() const noexcept; + reference operator[](size_type); + const_reference operator[](size_type) const; + + reference front(); + const_reference front() const; + + reference back(); + const_reference back() const; + template U* data() noexcept; @@ -62,6 +73,7 @@ namespace sparrow buffer() = default; explicit buffer(size_type size); + buffer(size_type size, value_type value); buffer(pointer data, size_type size); ~buffer(); @@ -74,9 +86,14 @@ namespace sparrow using base_type::empty; using base_type::size; + using base_type::operator[]; + using base_type::front; + using base_type::back; using base_type::data; void resize(size_type new_size); + void resize(size_type new_size, value_type value); + void clear(); void swap(buffer&) noexcept; bool equal(const buffer& rhs) const; @@ -109,6 +126,9 @@ namespace sparrow using base_type::empty; using base_type::size; + using base_type::operator[]; + using base_type::front; + using base_type::back; using base_type::data; void swap(buffer_view&) noexcept; @@ -136,6 +156,42 @@ namespace sparrow return m_size; } + template + auto buffer_data::operator[](size_type pos) -> reference + { + return data()[pos]; + } + + template + auto buffer_data::operator[](size_type pos) const -> const_reference + { + return data()[pos]; + } + + template + auto buffer_data::front() -> reference + { + return data()[0]; + } + + template + auto buffer_data::front() const -> const_reference + { + return data()[0]; + } + + template + auto buffer_data::back() -> reference + { + return data()[m_size - 1]; + } + + template + auto buffer_data::back() const -> const_reference + { + return data()[m_size - 1]; + } + template template U* buffer_data::data() noexcept @@ -174,6 +230,13 @@ namespace sparrow { } + template + buffer::buffer(size_type size, value_type value) + : base_type{allocate(size), size} + { + std::fill(data(), data() + size, value); + } + template buffer::buffer(pointer data, size_type size) : base_type{data, size} @@ -188,7 +251,7 @@ namespace sparrow template buffer::buffer(const buffer& rhs) - : base_type{allocate(rhs.m_size), rhs.size()} + : base_type{allocate(rhs.size()), rhs.size()} { std::copy(rhs.data(), rhs.data() + rhs.size(), data()); } @@ -226,11 +289,29 @@ namespace sparrow if (n != size()) { buffer tmp(n); - std::copy(data(), data() + size(), tmp.data()); + size_type copy_size = std::min(size(), n); + std::copy(data(), data() + copy_size, tmp.data()); swap(tmp); } } + template + void buffer::resize(size_type n, value_type value) + { + size_type old_size = size(); + resize(n); + if (old_size < n) + { + std::fill(data() + old_size, data() + n, value); + } + } + + template + void buffer::clear() + { + resize(size_type(0)); + } + template void buffer::swap(buffer& rhs) noexcept { diff --git a/include/sparrow/data_type.hpp b/include/sparrow/data_type.hpp new file mode 100644 index 00000000..37cb41c0 --- /dev/null +++ b/include/sparrow/data_type.hpp @@ -0,0 +1,62 @@ +// Copyright 2024 Man Group Operations Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +namespace sparrow +{ + // TODO: does not support all types specified by the Arrow specification + // yet + enum class data_type + { + NA = 0, + BOOL, + UINT8, + INT8, + UINT16, + INT16, + UINT32, + INT32, + UINT64, + INT64, + HALF_FLOAT, + FLOAT, + DOUBLE, + // UTF8 variable-length string + STRING, + // Variable-length bytes (no guarantee of UTF8-ness) + BINARY, + // Fixed-size binary. Each value occupies the same number of bytes + FIXED_SIZE_BINARY + }; + + // For now, a tiny wrapper around data_type + // More data and functions to come + class data_descriptor + { + public: + + constexpr explicit data_descriptor(data_type id = data_type::UINT8) + : m_id(id) + { + } + + constexpr data_type id() const { return m_id; } + + private: + + data_type m_id; + }; +} + diff --git a/include/sparrow/dynamic_bitset.hpp b/include/sparrow/dynamic_bitset.hpp new file mode 100644 index 00000000..9dd0b404 --- /dev/null +++ b/include/sparrow/dynamic_bitset.hpp @@ -0,0 +1,338 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include + +#include "sparrow/buffer.hpp" + +namespace sparrow +{ + template + class dynamic_bitset_base + { + public: + + using self_type = dynamic_bitset_base; + using storage_type = B; + using block_type = typename storage_type::value_type; + using value_type = bool; + using size_type = typename storage_type::size_type; + + size_type size() const noexcept; + size_type null_count() const noexcept; + + bool test(size_type pos) const; + void set(size_type pos, value_type value); + + block_type* data() noexcept; + const block_type* data() const noexcept; + size_type block_count() const noexcept; + + void swap(self_type&) noexcept; + + protected: + + dynamic_bitset_base(storage_type&& buffer, size_type size); + dynamic_bitset_base(storage_type&& buffer, size_type size, size_type null_count); + ~dynamic_bitset_base() = default; + + dynamic_bitset_base(const dynamic_bitset_base&) = default; + dynamic_bitset_base(dynamic_bitset_base&&) = default; + + dynamic_bitset_base& operator=(const dynamic_bitset_base&) = default; + dynamic_bitset_base& operator=(dynamic_bitset_base&&) = default; + + void resize(size_type n, value_type = false); + + size_type compute_block_count(size_type bits_count) const noexcept; + + private: + + size_type count_non_null() const noexcept; + size_type block_index(size_type pos) const noexcept; + size_type bit_index(size_type pos) const noexcept; + block_type bit_mask(size_type pos) const noexcept; + size_type count_extra_bits() const noexcept; + void zero_unused_bits(); + + static constexpr std::size_t s_bits_per_block = sizeof(block_type) * CHAR_BIT; + + storage_type m_buffer; + size_type m_size; + size_type m_null_count; + }; + + template + class dynamic_bitset : public dynamic_bitset_base> + { + public: + + using base_type = dynamic_bitset_base>; + using storage_type = typename base_type::storage_type; + using block_type = typename base_type::block_type; + using value_type = typename base_type::size_type; + using size_type = typename base_type::size_type; + + dynamic_bitset(); + explicit dynamic_bitset(size_type n, value_type v = false); + dynamic_bitset(block_type* p, size_type n); + dynamic_bitset(block_type* p, size_type n, size_type null_count); + + ~dynamic_bitset() = default; + dynamic_bitset(const dynamic_bitset&) = default; + dynamic_bitset(dynamic_bitset&&) = default; + + dynamic_bitset& operator=(const dynamic_bitset&) = default; + dynamic_bitset& operator=(dynamic_bitset&&) = default; + + using base_type::resize; + }; + + template + class dynamic_bitset_view : public dynamic_bitset_base> + { + public: + + using base_type = dynamic_bitset_base>; + using storage_type = typename base_type::storage_type; + using block_type = typename base_type::block_type; + using size_type = typename base_type::size_type; + + dynamic_bitset_view(block_type* p, size_type n); + dynamic_bitset_view(block_type* p, size_type n, size_type null_count); + ~dynamic_bitset_view() = default; + + dynamic_bitset_view(const dynamic_bitset_view&) = default; + dynamic_bitset_view(dynamic_bitset_view&&) = default; + + dynamic_bitset_view& operator=(const dynamic_bitset_view&) = default; + dynamic_bitset_view& operator=(dynamic_bitset_view&&) = default; + }; + + /************************************** + * dynamic_bitset_base implementation * + **************************************/ + + template + auto dynamic_bitset_base::size() const noexcept -> size_type + { + return m_size; + } + + template + auto dynamic_bitset_base::null_count() const noexcept -> size_type + { + return m_null_count; + } + + template + bool dynamic_bitset_base::test(size_type pos) const + { + return !m_null_count || m_buffer.data()[block_index(pos)] & bit_mask(pos); + } + + template + void dynamic_bitset_base::set(size_type pos, value_type value) + { + block_type& block = m_buffer.data()[block_index(pos)]; + bool old_value = block & bit_mask(pos); + if (value) + { + block |= bit_mask(pos); + if (m_null_count && !old_value) + { + --m_null_count; + } + } + else + { + block &= ~bit_mask(pos); + if (old_value) + { + ++m_null_count; + } + } + } + + template + auto dynamic_bitset_base::data() noexcept -> block_type* + { + return m_buffer.data(); + } + + template + auto dynamic_bitset_base::data() const noexcept -> const block_type* + { + return m_buffer.data(); + } + + template + auto dynamic_bitset_base::block_count() const noexcept -> size_type + { + return m_buffer.size(); + } + + template + void dynamic_bitset_base::swap(self_type& rhs) noexcept + { + using std::swap; + swap(m_buffer, rhs.m_buffer); + swap(m_size, rhs.m_size); + swap(m_null_count, rhs.m_null_count); + } + + template + dynamic_bitset_base::dynamic_bitset_base(storage_type&& buf, size_type size) + : m_buffer(std::move(buf)) + , m_size(size) + , m_null_count(m_size - count_non_null()) + { + zero_unused_bits(); + } + + template + dynamic_bitset_base::dynamic_bitset_base(storage_type&& buf, size_type size, size_type null_count) + : m_buffer(std::move(buf)) + , m_size(size) + , m_null_count(null_count) + { + zero_unused_bits(); + assert(m_null_count == m_size - count_non_null()); + } + + template + auto dynamic_bitset_base::compute_block_count(size_type bits_count) const noexcept -> size_type + { + return bits_count / s_bits_per_block + + static_cast(bits_count % s_bits_per_block != 0); + } + + template + auto dynamic_bitset_base::count_non_null() const noexcept -> size_type + { + // Number of bits set to 1 in i for i from 0 to 255 + static constexpr unsigned char table[] = + { + 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8 + }; + size_type res = 0; + const unsigned char* p = static_cast(static_cast(&m_buffer[0])); + size_type length = m_buffer.size() * sizeof(block_type); + for (size_type i = 0; i < length; ++i, ++p) + { + res += table[*p]; + } + return res; + } + + template + auto dynamic_bitset_base::block_index(size_type pos) const noexcept -> size_type + { + return pos / s_bits_per_block; + } + + template + auto dynamic_bitset_base::bit_index(size_type pos) const noexcept -> size_type + { + return pos % s_bits_per_block; + } + + template + auto dynamic_bitset_base::bit_mask(size_type pos) const noexcept -> block_type + { + return block_type(1) << bit_index(pos); + } + + template + auto dynamic_bitset_base::count_extra_bits() const noexcept -> size_type + { + return bit_index(size()); + } + + template + void dynamic_bitset_base::zero_unused_bits() + { + size_type extra_bits = count_extra_bits(); + if (extra_bits != 0) + { + m_buffer.back() &= ~(~block_type(0) << extra_bits); + } + } + + template + void dynamic_bitset_base::resize(size_type n, value_type b) + { + size_type old_block_count = m_buffer.size(); + size_type new_block_count = compute_block_count(n); + block_type value = b ? ~block_type(0) : block_type(0); + + if (new_block_count != old_block_count) + { + m_buffer.resize(new_block_count, value); + } + + if (b && n > m_size) + { + size_type extra_bits = count_extra_bits(); + if (extra_bits > 0) + { + m_buffer.data()[old_block_count - 1] |= (value << extra_bits); + } + } + + m_size = n; + m_null_count = m_size - count_non_null(); + zero_unused_bits(); + } + + /********************************* + * dynamic_bitset implementation * + *********************************/ + + template + dynamic_bitset::dynamic_bitset() + : base_type(storage_type(), 0u) + { + } + + template + dynamic_bitset::dynamic_bitset(size_type n, value_type value) + : base_type( + storage_type(this->compute_block_count(n), value ? ~block_type(0) : 0), + n, + value ? 0u : n) + { + } + + template + dynamic_bitset::dynamic_bitset(block_type* p, size_type n) + : base_type(storage_type(p, this->compute_block_count(n)), n) + { + } + + template + dynamic_bitset::dynamic_bitset(block_type* p, size_type n, size_type null_count) + : base_type(storage_type(p, this->compute_block_count(n)), n, null_count) + { + } +} diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 67ef49ae..62d3658e 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -36,7 +36,9 @@ endif() set(SPARROW_TESTS main.cpp + test_array_data.cpp test_buffer.cpp + test_dynamic_bitset.cpp test_iterator.cpp ) set(test_target "test_sparrow_lib") diff --git a/test/test_array_data.cpp b/test/test_array_data.cpp new file mode 100644 index 00000000..a8e71c1c --- /dev/null +++ b/test/test_array_data.cpp @@ -0,0 +1,103 @@ +// Copyright 2024 Man Group Operations Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "doctest/doctest.h" + +#include +#include +#include + +#include "sparrow/array_data.hpp" + +namespace sparrow +{ + struct array_data_fixture + { + array_data_fixture() + { + data_size = 16u; + data.bitmap.resize(data_size, true); + data.buffers.resize(1); + data.buffers[0].resize(data_size); + std::iota(data.buffers[0].data(), data.buffers[0].data() + data_size, 0u); + } + + array_data data; + std::size_t data_size; + + using reference_type = reference_proxy; + }; + + TEST_SUITE("reference_proxy") + { + TEST_CASE_FIXTURE(array_data_fixture, "value_semantic") + { + std::size_t index1 = 4u; + + SUBCASE("Constructor") + { + reference_type r(data.buffers[0][index1], index1, data); + CHECK_EQ(r, data.buffers[0][index1]); + } + + SUBCASE("Copy semantic") + { + std::size_t index2 = 12u; + reference_type r(data.buffers[0][index1], index1, data); + reference_type r2(r); + CHECK_EQ(r, r2); + + reference_type r3(data.buffers[0][index2], index2, data); + r2 = r3; + CHECK_EQ(r2, r3); + CHECK_EQ(data.buffers[0][index1], data.buffers[0][index2]); + } + + SUBCASE("Move semantic") + { + std::size_t index2 = 10u; + reference_type r(data.buffers[0][index1], index1, data); + reference_type r2(std::move(r)); + CHECK_EQ(r, r2); + + reference_type r3(data.buffers[0][index1], index1, data); + reference_type r4(data.buffers[0][index2], index2, data); + r3 = std::move(r4); + CHECK_EQ(r3, r4); + CHECK_EQ(data.buffers[0][index1], data.buffers[0][index2]); + } + } + + TEST_CASE_FIXTURE(array_data_fixture, "assignment") + { + std::size_t index = 4u; + reference_type r(data.buffers[0][index], index, data); + r = 2; + CHECK_EQ(data.buffers[0][index], 2); + + r += 3; + CHECK_EQ(data.buffers[0][index], 5); + + r -= 1; + CHECK_EQ(data.buffers[0][index], 4); + + r *= 3; + CHECK_EQ(data.buffers[0][index], 12); + + r /= 2; + CHECK_EQ(data.buffers[0][index], 6); + } + } +} + diff --git a/test/test_buffer.cpp b/test/test_buffer.cpp index 38bb02fc..71dd560c 100644 --- a/test/test_buffer.cpp +++ b/test/test_buffer.cpp @@ -60,6 +60,15 @@ namespace sparrow CHECK_EQ(b2.data(), mem); CHECK_EQ(b2.size(), expected_size); CHECK_EQ(b2.data()[2], uint8_t(2)); + + const uint8_t expected_value = 3; + buffer_test_type b3(expected_size, expected_value); + CHECK_NE(b3.data(), nullptr); + CHECK_EQ(b3.size(), expected_size); + for (std::size_t i = 0; i < expected_size; ++i) + { + CHECK_EQ(b3[i], expected_value); + } } TEST_CASE("copy semantic") @@ -156,6 +165,23 @@ namespace sparrow b.resize(size2); CHECK_EQ(b.size(), size2); CHECK_EQ(b.data()[2], 2); + + b.resize(size1); + CHECK_EQ(b.size(), size1); + CHECK_EQ(b.data()[2], 2); + + const std::size_t size3 = 6u; + b.resize(size3); + CHECK_EQ(b.size(), size3); + CHECK_EQ(b.data()[2], 2); + } + + TEST_CASE("clear") + { + const std::size_t size1 = 4u; + buffer_test_type b(make_test_buffer(size1), size1); + b.clear(); + CHECK_EQ(b.size(), 0u); } } diff --git a/test/test_dynamic_bitset.cpp b/test/test_dynamic_bitset.cpp new file mode 100644 index 00000000..6bd256e6 --- /dev/null +++ b/test/test_dynamic_bitset.cpp @@ -0,0 +1,176 @@ +// Copyright 2024 Man Group Operations Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "doctest/doctest.h" + +#include +#include + +#include "sparrow/dynamic_bitset.hpp" + +namespace sparrow +{ + struct bitmap_fixture + { + bitmap_fixture() + { + p_buffer = new std::uint8_t[m_block_count]; + p_buffer[0] = 38; // 00100110 + p_buffer[1] = 85; // 01010101 + p_buffer[2] = 53; // 00110101 + p_buffer[3] = 7; // 00000111 + m_null_count = 15; // Last 3 bits of buffer[3] are unused + } + + std::uint8_t* p_buffer; + std::size_t m_block_count = 4; + std::size_t m_size = 29; + std::size_t m_null_count; + }; + + TEST_SUITE("dynamic_bitset") + { + using bitmap = dynamic_bitset; + + TEST_CASE("constructor") + { + bitmap b1; + CHECK_EQ(b1.size(), 0u); + CHECK_EQ(b1.null_count(), 0u); + + const std::size_t expected_size = 13; + bitmap b2(expected_size); + CHECK_EQ(b2.size(), expected_size); + CHECK_EQ(b2.null_count(), expected_size); + + bitmap b3(expected_size, true); + CHECK_EQ(b3.size(), expected_size); + CHECK_EQ(b3.null_count(), 0u); + + bitmap_fixture bf; + bitmap b4(bf.p_buffer, bf.m_size); + CHECK_EQ(b4.size(), bf.m_size); + CHECK_EQ(b4.null_count(), bf.m_null_count); + + bitmap_fixture bf2; + bitmap b5(bf2.p_buffer, bf2.m_size, bf2.m_null_count); + CHECK_EQ(b5.size(), bf.m_size); + CHECK_EQ(b5.null_count(), bf.m_null_count); + } + + TEST_CASE_FIXTURE(bitmap_fixture, "data") + { + bitmap b(p_buffer, m_size); + CHECK_EQ(b.data(), p_buffer); + + const bitmap& b2 = b; + CHECK_EQ(b2.data(), p_buffer); + } + + TEST_CASE_FIXTURE(bitmap_fixture, "copy semantic") + { + bitmap b(p_buffer, m_size); + bitmap b2(b); + + CHECK_EQ(b.size(), b2.size()); + CHECK_EQ(b.null_count(), b2.null_count()); + CHECK_NE(b.data(), b2.data()); + for (size_t i = 0; i < m_block_count; ++i) + { + CHECK_EQ(b.data()[i], b2.data()[i]); + } + + const std::size_t expected_block_count = 2; + std::uint8_t* buf = new std::uint8_t[expected_block_count]; + buf[0] = 37; + buf[1] = 2; + bitmap b3(buf, expected_block_count * 8); + + b2 = b3; + CHECK_EQ(b2.size() , b3.size()); + CHECK_EQ(b2.null_count(), b3.null_count()); + CHECK_NE(b2.data(), b3.data()); + for (size_t i = 0; i < expected_block_count; ++i) + { + CHECK_EQ(b2.data()[i], b3.data()[i]); + } + } + + TEST_CASE_FIXTURE(bitmap_fixture, "move semantic") + { + bitmap bref(p_buffer, m_size); + bitmap b(bref); + + bitmap b2(std::move(b)); + CHECK_EQ(b2.size(), bref.size()); + CHECK_EQ(b2.null_count(), bref.null_count()); + for (size_t i = 0; i < m_block_count; ++i) + { + CHECK_EQ(b2.data()[i], bref.data()[i]); + } + + const std::size_t expected_block_count = 2; + std::uint8_t* buf = new std::uint8_t[expected_block_count]; + buf[0] = 37; + buf[1] = 2; + bitmap b4(buf, expected_block_count * 8); + bitmap b5(b4); + + b2 = std::move(b4); + CHECK_EQ(b2.size(), b5.size()); + CHECK_EQ(b2.null_count(), b5.null_count()); + for (size_t i = 0; i < expected_block_count; ++i) + { + CHECK_EQ(b2.data()[i], b5.data()[i]); + } + } + + TEST_CASE_FIXTURE(bitmap_fixture, "test/set") + { + bitmap bm(p_buffer, m_size); + bool b1 = bm.test(2); + CHECK(b1); + bool b2 = bm.test(3); + CHECK(!b2); + bool b3 = bm.test(24); + CHECK(b3); + + bm.set(3, true); + CHECK_EQ(bm.data()[0], 46); + CHECK_EQ(bm.null_count(), m_null_count - 1); + + bm.set(24, 0); + CHECK_EQ(bm.data()[3], 6); + CHECK_EQ(bm.null_count(), m_null_count); + + bm.set(24, 0); + CHECK_EQ(bm.data()[3], 6); + CHECK_EQ(bm.null_count(), m_null_count); + + bm.set(2, true); + CHECK(bm.test(2)); + CHECK_EQ(bm.null_count(), m_null_count); + } + + TEST_CASE_FIXTURE(bitmap_fixture, "resize") + { + bitmap bref(p_buffer, m_size); + bitmap b(bref); + b.resize(33); + CHECK_EQ(b.size(), 33); + CHECK_EQ(b.null_count(), m_null_count + 4); + } + } +} +