From 910b844dcaccc4b4692578d882201cb95271e49e Mon Sep 17 00:00:00 2001 From: Andrew Lumsdaine Date: Mon, 29 Apr 2024 15:11:17 -0700 Subject: [PATCH] Implement permutation view for external sort. (#4920) This pull request implements the `permutation_view` class. A `permutation_view` provides a view over an underlying range that is permuted by indirecting with a given index range. It is required that the permutation range is the same size as the data range and that it contain the values 0, 1, ..., N-1, in arbitrary order, where N is the size of the data range. Example: ```c++ std::vector v{1, 2, 3, 4, 5}; std::vector p{4, 3, 2, 1, 0}; permutation_view view(v, p); CHECK(std::ranges::equal(view, std::vector{5, 4, 3, 2, 1})); ``` The implementation uses `iterator_facade` to realize a C++20 standards conformant iterator and so that `permutation_view` satisfies the appropriate C++20 ranges concepts. The `permutation_view` class has been tested with `var_length_view`. Various range and iterator concepts are also tested. (This PR also cleans up the documentation and the structure of the `var_length_view` class.) --- TYPE: IMPROVEMENT DESC: Implement permutation view for external sort. --- tiledb/common/permutation_view.h | 226 ++++++++++++++++++++ tiledb/common/test/CMakeLists.txt | 2 +- tiledb/common/test/unit_permutation_view.cc | 202 +++++++++++++++++ tiledb/common/var_length_view.h | 66 ++++-- 4 files changed, 476 insertions(+), 20 deletions(-) create mode 100644 tiledb/common/permutation_view.h create mode 100644 tiledb/common/test/unit_permutation_view.cc diff --git a/tiledb/common/permutation_view.h b/tiledb/common/permutation_view.h new file mode 100644 index 00000000000..4cb7fa9cd0a --- /dev/null +++ b/tiledb/common/permutation_view.h @@ -0,0 +1,226 @@ +/** + * @file permutation_view.h + * + * @section LICENSE + * + * The MIT License + * + * @copyright Copyright (c) 2024 TileDB, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * @section DESCRIPTION + * + * This file contains the definition of the permutation_view class, which + * applies index indirection to a random access range, as specified by the + * indices in another random access range (the permutation). It is required + * that the permutation range is the same size as the data range and that it + * contain the values 0, 1, ..., N-1, in arbitrary order, where N is the size of + * the data range. For a data range `r` and a permutation `p`, and a + * permutation_view `v`, based on `r` and `p`, the expression `v[i]` is + * equivalent to `r[p[i]]`. + */ + +#ifndef TILEDB_PERMUTATION_VIEW_H +#define TILEDB_PERMUTATION_VIEW_H + +#include +#include +#include "iterator_facade.h" + +/** + * A view that creates a permutation of the underlying view, as determined by + * a permutation range. + * + * @tparam R Type of the data range, assumed to be a random access range. + * @tparam P Type of the index range, assumed to be a random access range. + * + * @todo R could be a view rather than a range. + */ +template < + std::ranges::random_access_range R, + std::ranges::random_access_range P> +class permutation_view : public std::ranges::view_base { + /** Forward reference of the iterator over the range of permuted data. */ + template + struct private_iterator; + + /** The data type of the permutation view. */ + using data_iterator_type = std::ranges::iterator_t; + + /** The index type of the permutation view indexing into underlying range. */ + using data_index_type = std::iter_difference_t; + + /** The type of the iterator over the index range -- It should derference to + * something that can index into the data range (e.g., the data_index_type)*/ + using index_iterator_type = std::ranges::iterator_t; + + /** The value_type dereferenced by the iterator is the value_type R */ + using permuted_value_type = std::ranges::range_value_t; + + /** The value_type dereferenced by the iterator is the value_type R */ + using permuted_reference = std::ranges::range_reference_t; + + /** The type of the iterator over the permuted data range */ + using permuted_iterator = private_iterator; + + /** The type of the const iterator over the permuted data range */ + using permuted_const_iterator = private_iterator; + + public: + /** Primary constructor */ + permutation_view(R& data, const P& permutation) + : data_begin_(std::ranges::begin(data)) + , data_end_(std::ranges::end(data)) + , index_begin_(std::ranges::begin(permutation)) + , index_end_(std::ranges::end(permutation)) { + // @todo Should this throw an exception instead? + assert( + static_cast(std::ranges::size(data)) == + std::ranges::size(permutation)); + } + + /** Return iterator to the beginning of the permutation view */ + auto begin() { + return permuted_iterator{data_begin_, index_begin_, 0}; + } + + /** Return iterator to the end of the permutation view */ + auto end() { + return permuted_iterator{ + data_begin_, index_begin_, data_end_ - data_begin_}; + } + + /** Return const iterator to the beginning of the permutation view */ + auto begin() const { + return permuted_const_iterator{data_begin_, index_begin_, 0}; + } + + /** Return const iterator to the end of the permutation view */ + auto end() const { + return permuted_const_iterator{ + data_begin_, index_begin_, data_end_ - data_begin_}; + } + + /** Return const iterator to the beginning of the permutation view */ + auto cbegin() const { + return permuted_const_iterator{data_begin_, index_begin_, 0}; + } + + /** Return const iterator to the end of the permutation view */ + auto cend() const { + return permuted_const_iterator{ + data_begin_, index_begin_, data_end_ - data_begin_}; + } + + /** Size of the permutation view */ + auto size() const { + return data_end_ - data_begin_; + } + + /** Accessor */ + auto& operator[](size_t i) const { + // More general? return *(data_begin_ + *(index_begin_ + i)); + return data_begin_[index_begin_[i]]; + } + + private: + /** + * Private iterator class for permuted view + * @tparam Value The type of the value that the iterator dereferences to + */ + template + struct private_iterator + : public iterator_facade> { + // using value_type_ = Value; + + public: + /** Default constructor */ + private_iterator() = default; + + /** + * Primary constructor + * @param data_begin The beginning of the data range + * @param index_begin The beginning of the index range + * @param index Offset into the data range + */ + private_iterator( + data_iterator_type data_begin, + index_iterator_type index_begin, + data_index_type index = 0) + : index_(index) + , data_begin_(data_begin) + , index_begin_(index_begin) { + } + + /** Dereference the iterator */ + // value_type_& + // std::ranges::range_reference_t + Reference dereference() const { + // More general? + // return *(data_begin_ + *(index_begin_ + index_)); + return data_begin_[index_begin_[index_]]; + } + + /** Advance the iterator by n */ + auto advance(data_index_type n) { + index_ += n; + return *this; + } + + /** Return the distance to another iterator */ + auto distance_to(const private_iterator& other) const { + return other.index_ - index_; + } + + /** Equality comparison of two iterators */ + bool operator==(const private_iterator& other) const { + return data_begin_ == other.data_begin_ && + index_begin_ == other.index_begin_ && index_ == other.index_; + } + + private: + /** Index to current location of iterator */ + data_index_type index_; + + /** Iterator to the beginning of the data range */ + data_iterator_type data_begin_; + + /** Iterator to the beginning of the index range */ + index_iterator_type index_begin_; + + /** Flag to indicate that the iterator is not a single pass iterator */ + static const bool single_pass_iterator = false; + }; + + /** Iterator to the beginning of the data range */ + std::ranges::iterator_t data_begin_; + + /** Iterator to the end of the data range */ + std::ranges::iterator_t data_end_; + + /** const_iterator is c++23. For now we just use an iterator to const */ + /** Iterator to the beginning of the index range */ + std::ranges::iterator_t index_begin_; + + /** Iterator to the end of the index range */ + std::ranges::iterator_t index_end_; +}; + +#endif // TILEDB_PERMUTATION_VIEW_H diff --git a/tiledb/common/test/CMakeLists.txt b/tiledb/common/test/CMakeLists.txt index 92fe7a1e25a..7aee2298f31 100644 --- a/tiledb/common/test/CMakeLists.txt +++ b/tiledb/common/test/CMakeLists.txt @@ -42,7 +42,7 @@ commence(unit_test memory_tracker_types) conclude(unit_test) commence(unit_test common_utils) - this_target_sources(main.cc unit_iterator_facade.cc unit_var_length_view.cc) + this_target_sources(main.cc unit_iterator_facade.cc unit_permutation_view.cc unit_var_length_view.cc) this_target_object_libraries(baseline) conclude(unit_test) diff --git a/tiledb/common/test/unit_permutation_view.cc b/tiledb/common/test/unit_permutation_view.cc new file mode 100644 index 00000000000..bead37c8705 --- /dev/null +++ b/tiledb/common/test/unit_permutation_view.cc @@ -0,0 +1,202 @@ +/** + * @file unit_permutation_view.cc + * + * @section LICENSE + * + * The MIT License + * + * @copyright Copyright (c) 2024 TileDB, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * @section DESCRIPTION + * + * This file implements unit tests for the permutation_view class. + */ + +#include +#include +#include +#include "../permutation_view.h" +#include "../var_length_view.h" + +TEST_CASE("permutation_view: Null test", "[permutation_view][null_test]") { + REQUIRE(true); +} + +// Test that the permutation_view satisfies the expected concepts +TEST_CASE("permutation_view: Range concepts", "[permutation_view][concepts]") { + using test_type = permutation_view, std::vector>; + + CHECK(std::ranges::range); + CHECK(!std::ranges::borrowed_range); + CHECK(std::ranges::sized_range); + CHECK(std::ranges::view); + CHECK(std::ranges::input_range); + CHECK(std::ranges:: + output_range>); + CHECK(std::ranges::forward_range); + CHECK(std::ranges::bidirectional_range); + CHECK(std::ranges::random_access_range); + CHECK(!std::ranges::contiguous_range); + CHECK(std::ranges::common_range); + CHECK(std::ranges::viewable_range); + + CHECK(std::ranges::view); +} + +// Test that the permutation_view iterators satisfy the expected concepts +TEST_CASE( + "permutation_view: Iterator concepts", "[permutation_view][concepts]") { + using test_type = permutation_view, std::vector>; + using test_type_iterator = std::ranges::iterator_t; + using test_type_const_iterator = std::ranges::iterator_t; + + CHECK(std::input_or_output_iterator); + CHECK(std::input_or_output_iterator); + CHECK(std::input_iterator); + CHECK(std::input_iterator); + CHECK(std::output_iterator< + test_type_iterator, + std::ranges::range_value_t>); + CHECK(!std::output_iterator< + test_type_const_iterator, + std::ranges::range_value_t>); + CHECK(std::forward_iterator); + CHECK(std::forward_iterator); + CHECK(std::bidirectional_iterator); + CHECK(std::bidirectional_iterator); + CHECK(std::random_access_iterator); + CHECK(std::random_access_iterator); +} + +// Test that the permutation_view value_type satisfies the expected concepts +TEST_CASE( + "permutation_view: value_type concepts", "[permutation_view][concepts]") { + using test_type = permutation_view, std::vector>; + CHECK(std::ranges::range); + + using test_iterator_type = std::ranges::iterator_t; + using test_iterator_value_type = std::iter_value_t; + using test_iterator_reference_type = + std::iter_reference_t; + + using range_value_type = std::ranges::range_value_t; + using range_reference_type = std::ranges::range_reference_t; + + CHECK(std::is_same_v); + CHECK(std::is_same_v); +} + +TEST_CASE("permutation_view: simple constructor", "permutation_view") { + std::vector v{1, 2, 3, 4, 5}; + std::vector p{4, 3, 2, 1, 0}; + permutation_view view(v, p); + CHECK(view.size() == 5); + CHECK(view[0] == 5); + CHECK(view[1] == 4); + CHECK(view[2] == 3); + CHECK(view[3] == 2); + CHECK(view[4] == 1); + + int off = 1; + for (auto&& i : view) { + CHECK(i == 6 - off++); + } + + CHECK(std::ranges::equal(view, std::vector{5, 4, 3, 2, 1})); +} + +TEST_CASE( + "permutation_view: check various iterator properties hold", + "[permutation_view]") { + std::vector v{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + std::vector p{9, 1, 3, 2, 8, 6, 5, 7, 4, 0}; + std::vector expected{10, 2, 4, 3, 9, 7, 6, 8, 5, 1}; + + permutation_view view(v, p); + CHECK(std::equal(view.begin(), view.end(), expected.begin())); + CHECK(std::equal(view.cbegin(), view.cend(), expected.begin())); + CHECK(std::ranges::equal(view, expected)); + + auto it = view.begin(); + CHECK(*it == 10); + CHECK(*(it + 1) == 2); + CHECK(it[2] == 4); + it[3] = 100; + CHECK(it[3] == 100); + CHECK(view[3] == 100); + *it++ = 200; + CHECK(view[0] == 200); + CHECK(*it == 2); + + CHECK(it == view.begin() + 1); + CHECK(it > view.begin()); + CHECK(it >= view.begin()); + CHECK(view.begin() < it); + CHECK(view.begin() <= it); + CHECK(it < view.end()); + CHECK(it <= view.end()); + CHECK(view.end() > it); + CHECK(view.end() >= it); + + --it; + CHECK(it == view.begin()); + view[9] = 1000; + CHECK(it[9] == 1000); + CHECK(view.end() - view.begin() == 10); + CHECK(view.end() - it == 10); + ++it; + CHECK(it == view.begin() + 1); + CHECK(view.end() - it == 9); + + auto it2 = it + 5; + CHECK(it2 - it == 5); + CHECK(it2 - 5 == it); + CHECK(it2 - 6 == view.begin()); + CHECK(it2 - view.begin() == 6); + + CHECK(*it2 == 6); + + auto cit = view.cbegin(); + (void)cit; + // Error: Not assignable + // *cit = 1; +} + +TEST_CASE( + "permutation_view: permute var length view", + "[permutation_view][var_length_view]") { + std::vector q = { + 21.0, 20.0, 19.0, 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0}; + std::vector p = {0, 2, 7, 10}; + std::vector o = {2, 0, 1}; + + std::vector> expected{ + {21.0, 20.0}, {19.0, 18.0, 17.0, 16.0, 15.0}, {14.0, 13.0, 12.0}}; + auto w = var_length_view(q, p); + CHECK(std::ranges::equal(*(w.begin()), expected[0])); + CHECK(std::ranges::equal(*(w.begin() + 1), expected[1])); + CHECK(std::ranges::equal(*(w.begin() + 2), expected[2])); + + auto x = permutation_view(w, o); + CHECK(std::ranges::equal(*(x.begin()), expected[2])); + CHECK(std::ranges::equal(*(x.begin() + 1), expected[0])); + CHECK(std::ranges::equal(*(x.begin() + 2), expected[1])); +} diff --git a/tiledb/common/var_length_view.h b/tiledb/common/var_length_view.h index 09feaaed52f..adec0fd8730 100644 --- a/tiledb/common/var_length_view.h +++ b/tiledb/common/var_length_view.h @@ -5,7 +5,7 @@ * * The MIT License * - * @copyright Copyright (c) 2024 TileDB, Inc. + * @copyright Copyright (c) 2018-2021 TileDB, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -69,28 +69,27 @@ class var_length_view : public std::ranges::view_base { template struct private_iterator; - // The type of the iterator over the var length data range, and the type that - // can index into it + /** The type of the iterator over the var length data range */ using data_iterator_type = std::ranges::iterator_t; + + /** The type that can index into the var length data range */ using data_index_type = std::iter_difference_t; - // The type of the iterator over the index range -- It should derference to - // something that can index into the data range (e.g., the data_index_type) + /** The type of the iterator over the index range -- It should dereference to + * something that can index into the data range (e.g., the data_index_type) */ using index_iterator_type = std::ranges::iterator_t; - // They type dereferenced by the iterator is a subrange + /** The type dereferenced by the iterator is a subrange */ using var_length_type = std::ranges::subrange; - using var_length_iterator = private_iterator; - using var_length_const_iterator = private_iterator; - std::ranges::iterator_t data_begin_; - std::ranges::iterator_t data_end_; + /** The type of the iterator over the var length view */ + using var_length_iterator = private_iterator; - // const_iterator is c++23. For now we just use an iterator to const - std::ranges::iterator_t index_begin_; - std::ranges::iterator_t index_end_; + /** The type of the const iterator over the var length view */ + using var_length_const_iterator = private_iterator; public: + /** Primary constructor */ var_length_view(R& data, const I& index) : data_begin_(std::ranges::begin(data)) , data_end_(std::ranges::end(data)) @@ -98,33 +97,40 @@ class var_length_view : public std::ranges::view_base { , index_end_(std::ranges::cend(index) - 1) { } + /** Return iterator to the beginning of the var length view */ auto begin() { return var_length_iterator(data_begin_, index_begin_, 0); } + /** Return iterator to the end of the var length view */ auto end() { return var_length_iterator( data_begin_, index_begin_, index_end_ - index_begin_); } + /** Return const iterator to the beginning of the var length view */ auto begin() const { return var_length_const_iterator(data_begin_, index_begin_, 0); } + /** Return const iterator to the end of the var length view */ auto end() const { return var_length_const_iterator( data_begin_, index_begin_, index_end_ - index_begin_); } + /** Return const iterator to the beginning of the var length view */ auto cbegin() const { return var_length_const_iterator(data_begin_, index_begin_, 0); } + /** Return const iterator to the end of the var length view */ auto cend() const { return var_length_const_iterator( data_begin_, index_begin_, index_end_ - index_begin_); } + /** Return the number of subranges in the var length view */ auto size() const { return index_end_ - index_begin_; } @@ -134,11 +140,10 @@ class var_length_view : public std::ranges::view_base { struct private_iterator : public iterator_facade> { using value_type_ = Value; - data_index_type index_; - data_iterator_type data_begin_; - index_iterator_type offsets_begin_; - + /** Default constructor */ private_iterator() = default; + + /** Primary constructor */ private_iterator( data_iterator_type data_begin, index_iterator_type offsets_begin, @@ -146,7 +151,6 @@ class var_length_view : public std::ranges::view_base { : index_(index) , data_begin_(data_begin) , offsets_begin_(offsets_begin) { - // ... } /************************************************************************* @@ -168,24 +172,48 @@ class var_length_view : public std::ranges::view_base { data_begin_ + offsets_begin_[index_ + 1]}; } + /** Advance the iterator by n */ auto advance(data_index_type n) { index_ += n; return *this; } + /** Return the distance to another iterator */ auto distance_to(const private_iterator& other) const { return other.index_ - index_; } + /** Compare two iterators for equality */ bool operator==(const private_iterator& other) const { return data_begin_ == other.data_begin_ && offsets_begin_ == other.offsets_begin_ && index_ == other.index_; } + /** Flag to indicate that the iterator is not a single pass iterator */ static const bool single_pass_iterator = false; + + /** The index to the current location of the iterator */ + data_index_type index_; + + /** Iterator to the beginning of the data range */ + data_iterator_type data_begin_; + + /** Iterator to the beginning of the index range */ + index_iterator_type offsets_begin_; }; - // using iterator = private_iterator; + /** The beginning of the data range */ + std::ranges::iterator_t data_begin_; + + /** The end of the data range */ + std::ranges::iterator_t data_end_; + + // const_iterator is c++23. For now we just use an iterator to const + /** The beginning of the index range */ + std::ranges::iterator_t index_begin_; + + /** The end of the index range */ + std::ranges::iterator_t index_end_; }; #endif // TILEDB_VAR_LENGTH_VIEW_H