From f5470df12d0c54cf5d446f4d854205795a28c3db Mon Sep 17 00:00:00 2001 From: Julien Jerphanion Date: Tue, 12 Mar 2024 15:15:00 +0100 Subject: [PATCH 1/7] Primitive layout Signed-off-by: Julien Jerphanion --- include/sparrow/array_data.hpp | 2 + include/sparrow/layout.hpp | 251 +++++++++++++++++++++++++++++++++ test/CMakeLists.txt | 1 + test/test_layout.cpp | 93 ++++++++++++ 4 files changed, 347 insertions(+) create mode 100644 include/sparrow/layout.hpp create mode 100644 test/test_layout.cpp diff --git a/include/sparrow/array_data.hpp b/include/sparrow/array_data.hpp index 141e58fd..e1f7ce62 100644 --- a/include/sparrow/array_data.hpp +++ b/include/sparrow/array_data.hpp @@ -14,6 +14,8 @@ #pragma once +#include + #include "sparrow/buffer.hpp" #include "sparrow/data_type.hpp" #include "sparrow/dynamic_bitset.hpp" diff --git a/include/sparrow/layout.hpp b/include/sparrow/layout.hpp new file mode 100644 index 00000000..c9bc03b7 --- /dev/null +++ b/include/sparrow/layout.hpp @@ -0,0 +1,251 @@ +// Copyright 2024 Man Group Operations Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or mplied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include + +#include "sparrow/array_data.hpp" +#include "sparrow/iterator.hpp" +#include "sparrow/buffer.hpp" + +namespace sparrow +{ + template + class primitive_layout_iterator + : public iterator_base + < + primitive_layout_iterator, + mpl::constify_t, + std::contiguous_iterator_tag + > + { + public: + + using self_type = primitive_layout_iterator; + using base_type = iterator_base + < + self_type, + mpl::constify_t, + std::contiguous_iterator_tag + >; + using pointer = typename base_type::pointer; + + // Required so that std::ranges::end(p) is + // valid when p is a primitive_layout. + primitive_layout_iterator() = default; + explicit primitive_layout_iterator(pointer p); + + private: + + // TODO: use reference proxies with an API similar to `std::optional` instead. + using reference = typename base_type::reference; + using difference_type = typename base_type::difference_type; + using size_type = std::size_t; + + reference dereference() const; + void increment(); + void decrement(); + void advance(difference_type n); + difference_type distance_to(const self_type& rhs) const; + bool equal(const self_type& rhs) const; + bool less_than(const self_type& rhs) const; + + // We only use the first buffer and the bitmap. + pointer m_pointer = nullptr; + size_type m_index = 0; + + friend class iterator_access; + }; + + template + class primitive_layout + { + public: + + using self_type = primitive_layout; + using value_type = T; + using reference = T&; + using const_reference = const T&; + using pointer = T*; + using const_pointer = const T*; + using size_type = std::size_t; + using difference_type = std::ptrdiff_t; + + explicit primitive_layout(array_data p); + + using iterator = primitive_layout_iterator; + using const_iterator = primitive_layout_iterator; + + size_type size() const; + reference element(size_type i); + const_reference element(size_type i) const; + + iterator begin(); + iterator end(); + + const_iterator begin() const; + const_iterator end() const; + + const_iterator cbegin() const; + const_iterator cend() const ; + + private: + // We only use the first buffer and the bitmap. + array_data m_data; + + pointer data(); + const_pointer data() const; + + }; + + /******************************************** + * primitive_layout_iterator implementation * + *******************************************/ + + template + primitive_layout_iterator::primitive_layout_iterator(pointer pointer) + : m_pointer(pointer), m_index(0) + { + } + + template + auto primitive_layout_iterator::dereference() const -> reference + { + return m_pointer[m_index]; + } + + template + void primitive_layout_iterator::increment() + { + ++m_index; + } + + template + void primitive_layout_iterator::decrement() + { + --m_index; + } + + template + void primitive_layout_iterator::advance(difference_type n) + { + m_index += n; + } + + template + auto primitive_layout_iterator::distance_to(const self_type& rhs) const -> difference_type + { + return rhs.m_index - m_index; + } + + template + bool primitive_layout_iterator::equal(const self_type& rhs) const + { + return distance_to(rhs) == 0; + } + + template + bool primitive_layout_iterator::less_than(const self_type& rhs) const + { + return distance_to(rhs) > 0; + } + + /*********************************** + * primitive_layout implementation * + * ********************************/ + + template + primitive_layout::primitive_layout(array_data ad) + : m_data(ad) + { + if (m_data.buffers.size() == 0) + { + throw std::runtime_error("No buffers are present in array_data"); + } + } + + template + auto primitive_layout::size() const -> size_type + { + return m_data.buffers[0].size(); + } + + template + auto primitive_layout::element(size_type i) -> reference + { + assert(pos < size()); + return i[data()]; + } + + template + auto primitive_layout::element(size_type i) const -> const_reference + { + assert(pos < size()); + return i[data()]; + } + + template + auto primitive_layout::begin() -> iterator + { + return iterator(data()); + } + + template + auto primitive_layout::end() -> iterator + { + return iterator(data()) + self_type::size(); + } + + template + auto primitive_layout::begin() const -> const_iterator + { + return cbegin(); + } + + template + auto primitive_layout::end() const -> const_iterator + { + return cend(); + } + + template + auto primitive_layout::cbegin() const -> const_iterator + { + return const_iterator(data()); + } + + template + auto primitive_layout::cend() const -> const_iterator + { + return const_iterator(data()) + self_type::size(); + } + + template + auto primitive_layout::data() -> pointer + { + return m_data.buffers[0].data(); + } + + template + auto primitive_layout::data() const -> const_pointer + { + return m_data.buffers[0].data(); + } + +} // namespace sparrow + + diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 76ccdafc..12a16fba 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -39,6 +39,7 @@ set(SPARROW_TESTS test_buffer.cpp test_dynamic_bitset.cpp test_iterator.cpp + test_layout.cpp ) set(test_target "test_sparrow_lib") add_executable(${test_target} ${SPARROW_TESTS}) diff --git a/test/test_layout.cpp b/test/test_layout.cpp new file mode 100644 index 00000000..206cf55a --- /dev/null +++ b/test/test_layout.cpp @@ -0,0 +1,93 @@ +// Copyright 2024 Man Group Operations Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include +#include + +#include "doctest/doctest.h" + +#include "sparrow/layout.hpp" + +namespace sparrow +{ + static_assert(std::ranges::range>); + + using layout_test_type = primitive_layout; + + namespace + { + array_data make_test_array_data() + { + size_t n = 10; + array_data ad; + + ad.type = data_descriptor(data_type::UINT8); + ad.bitmap = dynamic_bitset(n); + buffer b(n); + std::iota(b.begin(), b.end(), 0); + + ad.buffers.push_back(b); + ad.length = n; + ad.offset = 0; + ad.child_data.push_back(array_data()); + return ad; + } + + array_data make_empty_array_data() + { + array_data ad; + ad.type = data_descriptor(data_type::UINT8); + ad.length = 0; + ad.offset = 0; + return ad; + } + } + + TEST_SUITE("layout") + { + TEST_CASE("constructors") + { + array_data ad = make_test_array_data(); + layout_test_type lt(ad); + REQUIRE(lt.size() == ad.length); + + array_data empty_ad = make_empty_array_data(); + REQUIRE_THROWS_AS(layout_test_type empty_lt(empty_ad), typename std::runtime_error); + } + + TEST_CASE("layout_iterator") + { + layout_test_type lt(make_test_array_data()); + layout_test_type::iterator it = lt.begin(); + layout_test_type::const_iterator cit = lt.cbegin(); + REQUIRE(it < lt.end()); + REQUIRE(cit < lt.cend()); + } + + TEST_CASE("iterator") + { + layout_test_type lt(make_test_array_data()); + auto iter = lt.begin(); + auto citer = lt.cbegin(); + for (std::size_t i = 0; i < lt.size(); ++i) + { + CHECK_EQ(*iter++, lt.element(i)); + CHECK_EQ(*citer++, lt.element(i)); + } + CHECK_EQ(iter, lt.end()); + CHECK_EQ(citer, lt.cend()); + } + + } + +} From 39cc1740e3d16881d5364d15ea008446d49de71f Mon Sep 17 00:00:00 2001 From: Julien Jerphanion Date: Fri, 15 Mar 2024 09:49:57 +0100 Subject: [PATCH 2/7] Apply review comments Signed-off-by: Julien Jerphanion Co-authored-by: Johan Mabille Co-authored-by: Klaim --- include/sparrow/layout.hpp | 41 ++++++++++++++++++++++++++++---------- test/test_layout.cpp | 1 + 2 files changed, 31 insertions(+), 11 deletions(-) diff --git a/include/sparrow/layout.hpp b/include/sparrow/layout.hpp index c9bc03b7..c9cc121f 100644 --- a/include/sparrow/layout.hpp +++ b/include/sparrow/layout.hpp @@ -24,6 +24,14 @@ namespace sparrow { + /** + * An iterator for primitive_layout. + * + * @tparam T The type of the elements in the layout. + * @tparam is_const A boolean indicating whether the iterator is const. + * + * @note This class is not thread-safe, exception-safe, copyable, movable, equality comparable. + */ template class primitive_layout_iterator : public iterator_base @@ -66,11 +74,21 @@ namespace sparrow // We only use the first buffer and the bitmap. pointer m_pointer = nullptr; - size_type m_index = 0; friend class iterator_access; }; + /** + * A contiguous layout for primitive types. + * + * This class provides a contiguous layout for primitive types, such as `uint8_t`, `int32_t`, etc. + * It iterates over the first buffer in the array_data, and uses the bitmap to skip over null. + * The bitmap is assumed to be present in the array_data. + * + * @tparam T The type of the elements in the layout. + * + * @note This class is not thread-safe, exception-safe, copyable, movable, equality comparable. + */ template class primitive_layout { @@ -118,38 +136,38 @@ namespace sparrow template primitive_layout_iterator::primitive_layout_iterator(pointer pointer) - : m_pointer(pointer), m_index(0) + : m_pointer(pointer) { } template auto primitive_layout_iterator::dereference() const -> reference { - return m_pointer[m_index]; + return *m_pointer; } template void primitive_layout_iterator::increment() { - ++m_index; + ++m_pointer; } template void primitive_layout_iterator::decrement() { - --m_index; + --m_pointer; } template void primitive_layout_iterator::advance(difference_type n) { - m_index += n; + m_pointer += n; } template auto primitive_layout_iterator::distance_to(const self_type& rhs) const -> difference_type { - return rhs.m_index - m_index; + return rhs.m_pointer - m_pointer; } template @@ -172,15 +190,16 @@ namespace sparrow primitive_layout::primitive_layout(array_data ad) : m_data(ad) { - if (m_data.buffers.size() == 0) - { - throw std::runtime_error("No buffers are present in array_data"); - } + // We only require the presence of the bitmap and the first buffer. + assert(m_data.buffers.size() > 0); + assert(m_data.length == m_data.buffers[0].size()); + assert(m_data.length == m_data.bitmap.size()); } template auto primitive_layout::size() const -> size_type { + assert(m_data.buffers.size() > 0); return m_data.buffers[0].size(); } diff --git a/test/test_layout.cpp b/test/test_layout.cpp index 206cf55a..5c86893d 100644 --- a/test/test_layout.cpp +++ b/test/test_layout.cpp @@ -20,6 +20,7 @@ namespace sparrow { + // TODO: Test all the other base types once #15 is addressed. static_assert(std::ranges::range>); using layout_test_type = primitive_layout; From d7883d62798de1a2e747e4ef8647920eed2a53b5 Mon Sep 17 00:00:00 2001 From: Julien Jerphanion Date: Fri, 15 Mar 2024 09:49:57 +0100 Subject: [PATCH 3/7] Add assertions Signed-off-by: Julien Jerphanion Co-authored-by: Johan Mabille Co-authored-by: Klaim --- include/sparrow/layout.hpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/sparrow/layout.hpp b/include/sparrow/layout.hpp index c9cc121f..e7a07c20 100644 --- a/include/sparrow/layout.hpp +++ b/include/sparrow/layout.hpp @@ -256,12 +256,14 @@ namespace sparrow template auto primitive_layout::data() -> pointer { + assert(m_data.buffers.size() > 0); return m_data.buffers[0].data(); } template auto primitive_layout::data() const -> const_pointer { + assert(m_data.buffers.size() > 0); return m_data.buffers[0].data(); } From 99dbb103023c1e8a098e74d951e4d93257abd654 Mon Sep 17 00:00:00 2001 From: Julien Jerphanion Date: Fri, 15 Mar 2024 09:58:37 +0100 Subject: [PATCH 4/7] Remove test on empty array_data Signed-off-by: Julien Jerphanion --- test/test_layout.cpp | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/test/test_layout.cpp b/test/test_layout.cpp index 5c86893d..1d7ae785 100644 --- a/test/test_layout.cpp +++ b/test/test_layout.cpp @@ -44,14 +44,6 @@ namespace sparrow return ad; } - array_data make_empty_array_data() - { - array_data ad; - ad.type = data_descriptor(data_type::UINT8); - ad.length = 0; - ad.offset = 0; - return ad; - } } TEST_SUITE("layout") @@ -61,9 +53,6 @@ namespace sparrow array_data ad = make_test_array_data(); layout_test_type lt(ad); REQUIRE(lt.size() == ad.length); - - array_data empty_ad = make_empty_array_data(); - REQUIRE_THROWS_AS(layout_test_type empty_lt(empty_ad), typename std::runtime_error); } TEST_CASE("layout_iterator") From 83c8d7f670d4cc7e80cf7c4624a7fcde750dde7e Mon Sep 17 00:00:00 2001 From: Julien Jerphanion Date: Fri, 15 Mar 2024 10:20:54 +0100 Subject: [PATCH 5/7] Add static assertions on concepts Signed-off-by: Julien Jerphanion Co-authored-by: Klaim --- test/test_layout.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/test_layout.cpp b/test/test_layout.cpp index 1d7ae785..c3c2ec18 100644 --- a/test/test_layout.cpp +++ b/test/test_layout.cpp @@ -22,6 +22,8 @@ namespace sparrow { // TODO: Test all the other base types once #15 is addressed. static_assert(std::ranges::range>); + static_assert(std::contiguous_iterator>); + static_assert(std::contiguous_iterator>); using layout_test_type = primitive_layout; From 47ce4090ff2e33706d4b1d6b7fd43a0c7d92a405 Mon Sep 17 00:00:00 2001 From: Julien Jerphanion Date: Fri, 15 Mar 2024 17:33:34 +0100 Subject: [PATCH 6/7] Address review comments Signed-off-by: Julien Jerphanion Co-authored-by: Klaim --- include/sparrow/layout.hpp | 18 ++++++++---------- test/test_layout.cpp | 8 ++++---- 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/include/sparrow/layout.hpp b/include/sparrow/layout.hpp index e7a07c20..68af7cca 100644 --- a/include/sparrow/layout.hpp +++ b/include/sparrow/layout.hpp @@ -25,9 +25,9 @@ namespace sparrow { /** - * An iterator for primitive_layout. + * An iterator for `primitive_layout` operating on contiguous data only. * - * @tparam T The type of the elements in the layout. + * @tparam T The type of the elements in the layout's data buffer. * @tparam is_const A boolean indicating whether the iterator is const. * * @note This class is not thread-safe, exception-safe, copyable, movable, equality comparable. @@ -52,8 +52,6 @@ namespace sparrow >; using pointer = typename base_type::pointer; - // Required so that std::ranges::end(p) is - // valid when p is a primitive_layout. primitive_layout_iterator() = default; explicit primitive_layout_iterator(pointer p); @@ -72,7 +70,6 @@ namespace sparrow bool equal(const self_type& rhs) const; bool less_than(const self_type& rhs) const; - // We only use the first buffer and the bitmap. pointer m_pointer = nullptr; friend class iterator_access; @@ -85,7 +82,8 @@ namespace sparrow * It iterates over the first buffer in the array_data, and uses the bitmap to skip over null. * The bitmap is assumed to be present in the array_data. * - * @tparam T The type of the elements in the layout. + * @tparam T The type of the elements in the layout's data buffer. + * A primitive type or a fixed size type. * * @note This class is not thread-safe, exception-safe, copyable, movable, equality comparable. */ @@ -220,13 +218,13 @@ namespace sparrow template auto primitive_layout::begin() -> iterator { - return iterator(data()); + return iterator{data()}; } template auto primitive_layout::end() -> iterator { - return iterator(data()) + self_type::size(); + return iterator{data() + self_type::size()}; } template @@ -244,13 +242,13 @@ namespace sparrow template auto primitive_layout::cbegin() const -> const_iterator { - return const_iterator(data()); + return const_iterator{data()}; } template auto primitive_layout::cend() const -> const_iterator { - return const_iterator(data()) + self_type::size(); + return const_iterator{data() + self_type::size()}; } template diff --git a/test/test_layout.cpp b/test/test_layout.cpp index c3c2ec18..a8ed8406 100644 --- a/test/test_layout.cpp +++ b/test/test_layout.cpp @@ -21,7 +21,7 @@ namespace sparrow { // TODO: Test all the other base types once #15 is addressed. - static_assert(std::ranges::range>); + static_assert(std::ranges::contiguous_range>); static_assert(std::contiguous_iterator>); static_assert(std::contiguous_iterator>); @@ -48,7 +48,7 @@ namespace sparrow } - TEST_SUITE("layout") + TEST_SUITE("primitive_layout") { TEST_CASE("constructors") { @@ -57,7 +57,7 @@ namespace sparrow REQUIRE(lt.size() == ad.length); } - TEST_CASE("layout_iterator") + TEST_CASE("iterator_ordering") { layout_test_type lt(make_test_array_data()); layout_test_type::iterator it = lt.begin(); @@ -66,7 +66,7 @@ namespace sparrow REQUIRE(cit < lt.cend()); } - TEST_CASE("iterator") + TEST_CASE("iterator_equality") { layout_test_type lt(make_test_array_data()); auto iter = lt.begin(); From 7545daaf14779c1bffa669332f32be422874e192 Mon Sep 17 00:00:00 2001 From: Julien Jerphanion Date: Fri, 15 Mar 2024 17:50:23 +0100 Subject: [PATCH 7/7] Revert spurious changes Signed-off-by: Julien Jerphanion --- include/sparrow/array_data.hpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/sparrow/array_data.hpp b/include/sparrow/array_data.hpp index afe7c5ba..d2376e4c 100644 --- a/include/sparrow/array_data.hpp +++ b/include/sparrow/array_data.hpp @@ -88,7 +88,7 @@ namespace sparrow self_type& operator=(U&& value); void swap(self_type& rhs); - + private: void reset(); @@ -181,7 +181,7 @@ namespace sparrow template template U> - auto reference_proxy::operator=(const std::optional& rhs) -> self_type& + auto reference_proxy::operator=(const std::optional& rhs) -> self_type& { update(rhs); return *this; @@ -189,7 +189,7 @@ namespace sparrow template template U> - auto reference_proxy::operator=(std::optional&& rhs) -> self_type& + auto reference_proxy::operator=(std::optional&& rhs) -> self_type& { update(std::move(rhs)); return *this;