From 50b8891457d2c58f4b123d02a9f525ecf2efb798 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Fri, 15 Apr 2022 22:46:24 -0700 Subject: [PATCH 01/49] Add strong index type. --- .../cudf/detail/utilities/strong_index.hpp | 117 ++++++++++++++++++ cpp/include/cudf/table/row_operators.cuh | 26 ++-- cpp/src/groupby/sort/group_nunique.cu | 15 ++- cpp/src/transform/one_hot_encode.cu | 5 +- 4 files changed, 147 insertions(+), 16 deletions(-) create mode 100644 cpp/include/cudf/detail/utilities/strong_index.hpp diff --git a/cpp/include/cudf/detail/utilities/strong_index.hpp b/cpp/include/cudf/detail/utilities/strong_index.hpp new file mode 100644 index 00000000000..6655c6a1e5f --- /dev/null +++ b/cpp/include/cudf/detail/utilities/strong_index.hpp @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +namespace cudf { + +namespace detail { + +enum class index_type_name { + LEFT, + RIGHT, +}; + +template +struct strong_index { + public: + constexpr explicit strong_index(size_type index) : _index(index) {} + + constexpr explicit operator size_type() const { return _index; } + + constexpr size_type value() const { return _index; } + + constexpr strong_index operator+(size_type const& v) const { return strong_index(_index + v); } + constexpr strong_index operator-(size_type const& v) const { return strong_index(_index - v); } + constexpr strong_index operator*(size_type const& v) const { return strong_index(_index * v); } + + constexpr bool operator==(size_type v) const { return _index == v; } + constexpr bool operator!=(size_type v) const { return _index != v; } + constexpr bool operator<=(size_type v) const { return _index <= v; } + constexpr bool operator>=(size_type v) const { return _index >= v; } + constexpr bool operator<(size_type v) const { return _index < v; } + constexpr bool operator>(size_type v) const { return _index > v; } + + constexpr strong_index& operator=(strong_index const& s) + { + _index = s._index; + return *this; + } + constexpr strong_index& operator=(size_type const& i) + { + _index = i; + return *this; + } + constexpr strong_index& operator+=(size_type const& v) + { + _index += v; + return *this; + } + constexpr strong_index& operator-=(size_type const& v) + { + _index -= v; + return *this; + } + constexpr strong_index& operator*=(size_type const& v) + { + _index *= v; + return *this; + } + + constexpr strong_index& operator++() + { + ++_index; + return *this; + } + constexpr strong_index operator++(int) + { + strong_index tmp(*this); + ++_index; + return tmp; + } + constexpr strong_index& operator--() + { + --_index; + return *this; + } + constexpr strong_index operator--(int) + { + strong_index tmp(*this); + --_index; + return tmp; + } + + friend std::ostream& operator<<(std::ostream& os, strong_index s) + { + return os << s._index; + } + friend std::istream& operator>>(std::istream& is, strong_index& s) + { + return is >> s._index; + } + + private: + size_type _index; +}; + +} // namespace detail + +using lhs_index_type = detail::strong_index; +using rhs_index_type = detail::strong_index; + +} // namespace cudf diff --git a/cpp/include/cudf/table/row_operators.cuh b/cpp/include/cudf/table/row_operators.cuh index 4eca03a800c..5bad736c580 100644 --- a/cpp/include/cudf/table/row_operators.cuh +++ b/cpp/include/cudf/table/row_operators.cuh @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -198,12 +199,12 @@ class element_equality_comparator { */ template ()>* = nullptr> - __device__ bool operator()(size_type lhs_element_index, - size_type rhs_element_index) const noexcept + __device__ bool operator()(cudf::lhs_index_type lhs_element_index, + cudf::rhs_index_type rhs_element_index) const noexcept { if (nulls) { - bool const lhs_is_null{lhs.is_null(lhs_element_index)}; - bool const rhs_is_null{rhs.is_null(rhs_element_index)}; + bool const lhs_is_null{lhs.is_null(lhs_element_index.value())}; + bool const rhs_is_null{rhs.is_null(rhs_element_index.value())}; if (lhs_is_null and rhs_is_null) { return nulls_are_equal == null_equality::EQUAL; } else if (lhs_is_null != rhs_is_null) { @@ -211,17 +212,24 @@ class element_equality_comparator { } } - return equality_compare(lhs.element(lhs_element_index), - rhs.element(rhs_element_index)); + return equality_compare(lhs.element(lhs_element_index.value()), + rhs.element(rhs_element_index.value())); } template ()>* = nullptr> - __device__ bool operator()(size_type lhs_element_index, size_type rhs_element_index) + __device__ bool operator()(cudf::lhs_index_type lhs_element_index, + cudf::rhs_index_type rhs_element_index) const noexcept { CUDF_UNREACHABLE("Attempted to compare elements of uncomparable types."); } + __device__ bool operator()(cudf::rhs_index_type rhs_element_index, + cudf::lhs_index_type lhs_element_index) const noexcept + { + return operator()(lhs_element_index, rhs_element_index); + } + private: column_device_view lhs; column_device_view rhs; @@ -246,8 +254,8 @@ class row_equality_comparator { auto equal_elements = [=](column_device_view l, column_device_view r) { return cudf::type_dispatcher(l.type(), element_equality_comparator{nulls, l, r, nulls_are_equal}, - lhs_row_index, - rhs_row_index); + cudf::lhs_index_type(lhs_row_index), + cudf::rhs_index_type(rhs_row_index)); }; return thrust::equal(thrust::seq, lhs.begin(), lhs.end(), rhs.begin(), equal_elements); diff --git a/cpp/src/groupby/sort/group_nunique.cu b/cpp/src/groupby/sort/group_nunique.cu index 478060cbd16..a29e3cf238c 100644 --- a/cpp/src/groupby/sort/group_nunique.cu +++ b/cpp/src/groupby/sort/group_nunique.cu @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -62,9 +63,11 @@ struct nunique_functor { group_labels = group_labels.data()] __device__(auto i) -> size_type { bool is_input_countable = (null_handling == null_policy::INCLUDE || v.is_valid_nocheck(i)); - bool is_unique = is_input_countable && - (group_offsets[group_labels[i]] == i || // first element or - (not equal.operator()(i, i - 1))); // new unique value in sorted + bool is_unique = + is_input_countable && + (group_offsets[group_labels[i]] == i || // first element or + (not equal.operator()(cudf::lhs_index_type(i), + cudf::rhs_index_type(i - 1)))); // new unique value in sorted return static_cast(is_unique); }); @@ -82,8 +85,10 @@ struct nunique_functor { equal, group_offsets = group_offsets.data(), group_labels = group_labels.data()] __device__(auto i) -> size_type { - bool is_unique = group_offsets[group_labels[i]] == i || // first element or - (not equal.operator()(i, i - 1)); // new unique value in sorted + bool is_unique = + group_offsets[group_labels[i]] == i || // first element or + (not equal.operator()(cudf::lhs_index_type(i), + cudf::rhs_index_type(i - 1))); // new unique value in sorted return static_cast(is_unique); }); thrust::reduce_by_key(rmm::exec_policy(stream), diff --git a/cpp/src/transform/one_hot_encode.cu b/cpp/src/transform/one_hot_encode.cu index b1a8858f847..a8e49b04305 100644 --- a/cpp/src/transform/one_hot_encode.cu +++ b/cpp/src/transform/one_hot_encode.cu @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -47,8 +48,8 @@ struct one_hot_encode_functor { bool __device__ operator()(size_type i) { - size_type const element_index = i % _input_size; - size_type const category_index = i / _input_size; + cudf::lhs_index_type const element_index(i % _input_size); + cudf::rhs_index_type const category_index(i / _input_size); return _equality_comparator.template operator()(element_index, category_index); } From b9ed4d79525f5812861ac6a5f06c9d23e18753a2 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Wed, 20 Apr 2022 12:53:39 -0700 Subject: [PATCH 02/49] Revert changes to non-experimental row operators. --- cpp/include/cudf/table/row_operators.cuh | 26 ++++++++---------------- cpp/src/groupby/sort/group_nunique.cu | 15 +++++--------- cpp/src/transform/one_hot_encode.cu | 5 ++--- 3 files changed, 16 insertions(+), 30 deletions(-) diff --git a/cpp/include/cudf/table/row_operators.cuh b/cpp/include/cudf/table/row_operators.cuh index 5bad736c580..4eca03a800c 100644 --- a/cpp/include/cudf/table/row_operators.cuh +++ b/cpp/include/cudf/table/row_operators.cuh @@ -20,7 +20,6 @@ #include #include #include -#include #include #include #include @@ -199,12 +198,12 @@ class element_equality_comparator { */ template ()>* = nullptr> - __device__ bool operator()(cudf::lhs_index_type lhs_element_index, - cudf::rhs_index_type rhs_element_index) const noexcept + __device__ bool operator()(size_type lhs_element_index, + size_type rhs_element_index) const noexcept { if (nulls) { - bool const lhs_is_null{lhs.is_null(lhs_element_index.value())}; - bool const rhs_is_null{rhs.is_null(rhs_element_index.value())}; + bool const lhs_is_null{lhs.is_null(lhs_element_index)}; + bool const rhs_is_null{rhs.is_null(rhs_element_index)}; if (lhs_is_null and rhs_is_null) { return nulls_are_equal == null_equality::EQUAL; } else if (lhs_is_null != rhs_is_null) { @@ -212,24 +211,17 @@ class element_equality_comparator { } } - return equality_compare(lhs.element(lhs_element_index.value()), - rhs.element(rhs_element_index.value())); + return equality_compare(lhs.element(lhs_element_index), + rhs.element(rhs_element_index)); } template ()>* = nullptr> - __device__ bool operator()(cudf::lhs_index_type lhs_element_index, - cudf::rhs_index_type rhs_element_index) const noexcept + __device__ bool operator()(size_type lhs_element_index, size_type rhs_element_index) { CUDF_UNREACHABLE("Attempted to compare elements of uncomparable types."); } - __device__ bool operator()(cudf::rhs_index_type rhs_element_index, - cudf::lhs_index_type lhs_element_index) const noexcept - { - return operator()(lhs_element_index, rhs_element_index); - } - private: column_device_view lhs; column_device_view rhs; @@ -254,8 +246,8 @@ class row_equality_comparator { auto equal_elements = [=](column_device_view l, column_device_view r) { return cudf::type_dispatcher(l.type(), element_equality_comparator{nulls, l, r, nulls_are_equal}, - cudf::lhs_index_type(lhs_row_index), - cudf::rhs_index_type(rhs_row_index)); + lhs_row_index, + rhs_row_index); }; return thrust::equal(thrust::seq, lhs.begin(), lhs.end(), rhs.begin(), equal_elements); diff --git a/cpp/src/groupby/sort/group_nunique.cu b/cpp/src/groupby/sort/group_nunique.cu index a29e3cf238c..478060cbd16 100644 --- a/cpp/src/groupby/sort/group_nunique.cu +++ b/cpp/src/groupby/sort/group_nunique.cu @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include @@ -63,11 +62,9 @@ struct nunique_functor { group_labels = group_labels.data()] __device__(auto i) -> size_type { bool is_input_countable = (null_handling == null_policy::INCLUDE || v.is_valid_nocheck(i)); - bool is_unique = - is_input_countable && - (group_offsets[group_labels[i]] == i || // first element or - (not equal.operator()(cudf::lhs_index_type(i), - cudf::rhs_index_type(i - 1)))); // new unique value in sorted + bool is_unique = is_input_countable && + (group_offsets[group_labels[i]] == i || // first element or + (not equal.operator()(i, i - 1))); // new unique value in sorted return static_cast(is_unique); }); @@ -85,10 +82,8 @@ struct nunique_functor { equal, group_offsets = group_offsets.data(), group_labels = group_labels.data()] __device__(auto i) -> size_type { - bool is_unique = - group_offsets[group_labels[i]] == i || // first element or - (not equal.operator()(cudf::lhs_index_type(i), - cudf::rhs_index_type(i - 1))); // new unique value in sorted + bool is_unique = group_offsets[group_labels[i]] == i || // first element or + (not equal.operator()(i, i - 1)); // new unique value in sorted return static_cast(is_unique); }); thrust::reduce_by_key(rmm::exec_policy(stream), diff --git a/cpp/src/transform/one_hot_encode.cu b/cpp/src/transform/one_hot_encode.cu index a8e49b04305..b1a8858f847 100644 --- a/cpp/src/transform/one_hot_encode.cu +++ b/cpp/src/transform/one_hot_encode.cu @@ -19,7 +19,6 @@ #include #include #include -#include #include #include #include @@ -48,8 +47,8 @@ struct one_hot_encode_functor { bool __device__ operator()(size_type i) { - cudf::lhs_index_type const element_index(i % _input_size); - cudf::rhs_index_type const category_index(i / _input_size); + size_type const element_index = i % _input_size; + size_type const category_index = i / _input_size; return _equality_comparator.template operator()(element_index, category_index); } From d67f17ef005c285fcb2a961ab8e8d77a63160c92 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Mon, 2 May 2022 17:19:09 -0700 Subject: [PATCH 03/49] Use enum for strongly typed index. --- .../cudf/detail/utilities/strong_index.hpp | 95 +------------------ 1 file changed, 2 insertions(+), 93 deletions(-) diff --git a/cpp/include/cudf/detail/utilities/strong_index.hpp b/cpp/include/cudf/detail/utilities/strong_index.hpp index 6655c6a1e5f..2762364f811 100644 --- a/cpp/include/cudf/detail/utilities/strong_index.hpp +++ b/cpp/include/cudf/detail/utilities/strong_index.hpp @@ -20,98 +20,7 @@ namespace cudf { -namespace detail { - -enum class index_type_name { - LEFT, - RIGHT, -}; - -template -struct strong_index { - public: - constexpr explicit strong_index(size_type index) : _index(index) {} - - constexpr explicit operator size_type() const { return _index; } - - constexpr size_type value() const { return _index; } - - constexpr strong_index operator+(size_type const& v) const { return strong_index(_index + v); } - constexpr strong_index operator-(size_type const& v) const { return strong_index(_index - v); } - constexpr strong_index operator*(size_type const& v) const { return strong_index(_index * v); } - - constexpr bool operator==(size_type v) const { return _index == v; } - constexpr bool operator!=(size_type v) const { return _index != v; } - constexpr bool operator<=(size_type v) const { return _index <= v; } - constexpr bool operator>=(size_type v) const { return _index >= v; } - constexpr bool operator<(size_type v) const { return _index < v; } - constexpr bool operator>(size_type v) const { return _index > v; } - - constexpr strong_index& operator=(strong_index const& s) - { - _index = s._index; - return *this; - } - constexpr strong_index& operator=(size_type const& i) - { - _index = i; - return *this; - } - constexpr strong_index& operator+=(size_type const& v) - { - _index += v; - return *this; - } - constexpr strong_index& operator-=(size_type const& v) - { - _index -= v; - return *this; - } - constexpr strong_index& operator*=(size_type const& v) - { - _index *= v; - return *this; - } - - constexpr strong_index& operator++() - { - ++_index; - return *this; - } - constexpr strong_index operator++(int) - { - strong_index tmp(*this); - ++_index; - return tmp; - } - constexpr strong_index& operator--() - { - --_index; - return *this; - } - constexpr strong_index operator--(int) - { - strong_index tmp(*this); - --_index; - return tmp; - } - - friend std::ostream& operator<<(std::ostream& os, strong_index s) - { - return os << s._index; - } - friend std::istream& operator>>(std::istream& is, strong_index& s) - { - return is >> s._index; - } - - private: - size_type _index; -}; - -} // namespace detail - -using lhs_index_type = detail::strong_index; -using rhs_index_type = detail::strong_index; +enum class lhs_index_type : cudf::size_type {}; +enum class rhs_index_type : cudf::size_type {}; } // namespace cudf From 464ed2b0d2a3e9d9c53c508c4444208e769d4832 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Mon, 2 May 2022 17:19:34 -0700 Subject: [PATCH 04/49] Add two table comparator and adapter. --- .../cudf/table/experimental/row_operators.cuh | 160 +++++++++++++++++- 1 file changed, 159 insertions(+), 1 deletion(-) diff --git a/cpp/include/cudf/table/experimental/row_operators.cuh b/cpp/include/cudf/table/experimental/row_operators.cuh index 2ed45c71633..eb5be4287e2 100644 --- a/cpp/include/cudf/table/experimental/row_operators.cuh +++ b/cpp/include/cudf/table/experimental/row_operators.cuh @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -89,6 +90,7 @@ namespace lexicographic { template class device_row_comparator { friend class self_comparator; + // friend class two_table_device_row_comparator_adapter; /** * @brief Construct a function object for performing a lexicographic @@ -277,7 +279,7 @@ struct preprocessed_table { * @brief Preprocess table for use with lexicographical comparison * * Sets up the table for use with lexicographical comparison. The resulting preprocessed table can - * be passed to the constructor of `lex::self_comparator` to avoid preprocessing again. + * be passed to the constructor of `lexicographic::self_comparator` to avoid preprocessing again. * * @param table The table to preprocess * @param column_order Optional, host array the same length as a row that indicates the desired @@ -427,6 +429,162 @@ class self_comparator { std::shared_ptr d_t; }; +template +class two_table_device_row_comparator_adapter { + friend class two_table_comparator; + + public: + /** + * @brief Checks whether the row at `lhs_index` in the `lhs` table compares + * lexicographically less than the row at `rhs_index` in the `rhs` table. + * + * @param lhs_index The index of row in the `lhs` table to examine + * @param rhs_index The index of the row in the `rhs` table to examine + * @return `true` if row from the `lhs` table compares less than row in the `rhs` table + */ + __device__ bool operator()(lhs_index_type const lhs_index, + rhs_index_type const rhs_index) const noexcept + { + return comp(static_cast(lhs_index), static_cast(rhs_index)); + } + + /** + * @brief Checks whether the row at `rhs_index` in the `rhs` table compares + * lexicographically less than the row at `lhs_index` in the `lhs` table. + * + * @param rhs_index The index of row in the `rhs` table to examine + * @param lhs_index The index of the row in the `lhs` table to examine + * @return `true` if row from the `rhs` table compares less than row in the `lhs` table + */ + __device__ bool operator()(rhs_index_type const rhs_index, + lhs_index_type const lhs_index) const noexcept + { + // TODO: "not lhs < rhs" isn't quite the same as "rhs < lhs". The case of + // equality returns true for operator(rhs, lhs), while operator(lhs, rhs) + // returns false. This would have to be handled at a lower level, if it + // matters. Do we just document that this means "rhs <= lhs"? + return not comp(static_cast(lhs_index), + static_cast(rhs_index)); + } + + private: + /** + * @brief Construct a function object for performing a lexicographic + * comparison between the rows of two tables with strongly typed table index + * types. + * + * @param check_nulls Indicates if either input table contains columns with nulls. + * @param lhs The first table + * @param rhs The second table (may be the same table as `lhs`) + * @param depth Optional, device array the same length as a row that contains starting depths of + * columns if they're nested, and 0 otherwise. + * @param column_order Optional, device array the same length as a row that indicates the desired + * ascending/descending order of each column in a row. If `nullopt`, it is assumed all columns are + * sorted in ascending order. + * @param null_precedence Optional, device array the same length as a row and indicates how null + * values compare to all other for every column. If `nullopt`, then null precedence would be + * `null_order::BEFORE` for all columns. + */ + two_table_device_row_comparator_adapter( + Nullate check_nulls, + table_device_view lhs, + table_device_view rhs, + std::optional> depth = std::nullopt, + std::optional> column_order = std::nullopt, + std::optional> null_precedence = std::nullopt) + : comp{check_nulls, lhs, rhs, depth, column_order, null_precedence} + { + } + + device_row_comparator comp; +}; + +/** + * @brief An owning object that can be used to lexicographically compare rows of two different + * tables + * + * This class takes two table_views and preprocesses certain columns to allow for lexicographical + * comparison. The preprocessed table and temporary data required for the comparison are created and + * owned by this class. + * + * Alternatively, `two_table_comparator` can be constructed from two existing + * `shared_ptr`s when sharing the same tables among multiple comparators. + * + * This class can then provide a functor object that can used on the device. + * The object of this class must outlive the usage of the device functor. + */ +class two_table_comparator { + public: + /** + * @brief Construct an owning object for performing a lexicographic comparison between rows of + * two different tables. + * + * The left and right table are expected to have the same number of columns + * and data types for each column. + * + * @param left The left table to compare + * @param right The right table to compare + * @param column_order Optional, host array the same length as a row that indicates the desired + * ascending/descending order of each column in a row. If empty, it is assumed all columns are + * sorted in ascending order. + * @param null_precedence Optional, device array the same length as a row and indicates how null + * values compare to all other for every column. If empty, then null precedence would be + * `null_order::BEFORE` for all columns. + * @param stream The stream to construct this object on. Not the stream that will be used for + * comparisons using this object. + */ + two_table_comparator(table_view const& left, + table_view const& right, + host_span column_order = {}, + host_span null_precedence = {}, + rmm::cuda_stream_view stream = rmm::cuda_stream_default) + : d_left_table{preprocessed_table::create(left, column_order, null_precedence, stream)}, + d_right_table{preprocessed_table::create(right, column_order, null_precedence, stream)} + { + } + + /** + * @brief Construct an owning object for performing a lexicographic comparison between two rows of + * the same preprocessed table. + * + * This constructor allows independently constructing a `preprocessed_table` and sharing it among + * multiple comparators. + * + * @param left A table preprocessed for lexicographic comparison + * @param right A table preprocessed for lexicographic comparison + */ + two_table_comparator(std::shared_ptr left, + std::shared_ptr right) + : d_left_table{std::move(left)}, d_right_table{std::move(right)} + { + } + + /** + * @brief Return the binary operator for comparing rows in the table. + * + * Returns a binary callable, `F`, with signature `bool F(lhs_index_type, rhs_index_type)`. + * + * `F(i,j)` returns true if and only if row `i` of the left table compares + * lexicographically less than row `j` of the right table. + * + * @tparam Nullate A cudf::nullate type describing whether to check for nulls. + */ + template + two_table_device_row_comparator_adapter device_comparator(Nullate nullate = {}) const + { + return two_table_device_row_comparator_adapter(nullate, + *d_left_table, + *d_right_table, + d_left_table->depths(), + d_left_table->column_order(), + d_left_table->null_precedence()); + } + + private: + std::shared_ptr d_left_table; + std::shared_ptr d_right_table; +}; + } // namespace lexicographic namespace hash { From b26b3183638628427c9ce200afb9266da1028985 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Mon, 2 May 2022 19:51:05 -0700 Subject: [PATCH 05/49] Add friends. :) --- cpp/include/cudf/table/experimental/row_operators.cuh | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/cpp/include/cudf/table/experimental/row_operators.cuh b/cpp/include/cudf/table/experimental/row_operators.cuh index eb5be4287e2..4f4a133e6e0 100644 --- a/cpp/include/cudf/table/experimental/row_operators.cuh +++ b/cpp/include/cudf/table/experimental/row_operators.cuh @@ -72,6 +72,9 @@ namespace row { namespace lexicographic { +template +class two_table_device_row_comparator_adapter; + /** * @brief Computes whether one row is lexicographically *less* than another row. * @@ -90,7 +93,7 @@ namespace lexicographic { template class device_row_comparator { friend class self_comparator; - // friend class two_table_device_row_comparator_adapter; + friend class two_table_device_row_comparator_adapter; /** * @brief Construct a function object for performing a lexicographic @@ -297,6 +300,7 @@ struct preprocessed_table { private: friend class self_comparator; + friend class two_table_comparator; preprocessed_table(table_device_view_owner&& table, rmm::device_uvector&& column_order, From 1fd199d16bd6a6c26f0eb68ebda145f307d8a7d8 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Mon, 2 May 2022 20:52:47 -0700 Subject: [PATCH 06/49] Apply two-table comparator to search algorithms. --- cpp/src/search/search.cu | 55 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 54 insertions(+), 1 deletion(-) diff --git a/cpp/src/search/search.cu b/cpp/src/search/search.cu index 29eddf703df..b907ad8b786 100644 --- a/cpp/src/search/search.cu +++ b/cpp/src/search/search.cu @@ -19,11 +19,13 @@ #include #include #include +#include #include #include #include #include #include +#include #include #include #include @@ -75,6 +77,24 @@ void launch_search(DataIterator it_data, } } +struct make_lhs_index { + __device__ lhs_index_type operator()(size_type i) const { return static_cast(i); } +}; + +struct make_rhs_index { + __device__ rhs_index_type operator()(size_type i) const { return static_cast(i); } +}; + +auto make_lhs_index_counting_iterator(size_type start) +{ + return cudf::detail::make_counting_transform_iterator(start, make_lhs_index{}); +}; + +auto make_rhs_index_counting_iterator(size_type start) +{ + return cudf::detail::make_counting_transform_iterator(start, make_rhs_index{}); +}; + std::unique_ptr search_ordered(table_view const& t, table_view const& values, bool find_first, @@ -104,8 +124,40 @@ std::unique_ptr search_ordered(table_view const& t, // This utility will ensure all corresponding dictionary columns have matching keys. // It will return any new dictionary columns created as well as updated table_views. - auto const matched = dictionary::detail::match_dictionaries({t, values}, stream); + auto const matched = dictionary::detail::match_dictionaries({t, values}, stream); + auto const matched_t = matched.second.front(); + auto const matched_values = matched.second.back(); + + auto const& lhs = find_first ? matched_t : matched_values; + auto const& rhs = find_first ? matched_values : matched_t; + auto const comparator = cudf::experimental::row::lexicographic::two_table_comparator( + lhs, rhs, column_order, null_precedence, stream); + auto const has_null_elements = has_nested_nulls(lhs) or has_nested_nulls(rhs); + auto const d_comparator = comparator.device_comparator(nullate::DYNAMIC{has_null_elements}); + + auto const left_it = cudf::make_lhs_index_counting_iterator(0); + auto const right_it = cudf::make_rhs_index_counting_iterator(0); + if (find_first) { + launch_search(left_it, + right_it, + t.num_rows(), + values.num_rows(), + result_out, + d_comparator, + find_first, + stream); + } else { + launch_search(right_it, + left_it, + t.num_rows(), + values.num_rows(), + result_out, + d_comparator, + find_first, + stream); + } + /* // Prepare to flatten the structs column auto const has_null_elements = has_nested_nulls(t) or has_nested_nulls(values); auto const flatten_nullability = has_null_elements @@ -137,6 +189,7 @@ std::unique_ptr search_ordered(table_view const& t, null_precedence_dv.data()); launch_search( count_it, count_it, t.num_rows(), values.num_rows(), result_out, comp, find_first, stream); + */ return result; } From 18bd9f021ff7496a68e76ff45f9f01841b81dd61 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Mon, 2 May 2022 21:10:21 -0700 Subject: [PATCH 07/49] Move shared lhs/rhs logic into launch_search. --- cpp/src/search/search.cu | 83 +++++++++++++++------------------------- 1 file changed, 30 insertions(+), 53 deletions(-) diff --git a/cpp/src/search/search.cu b/cpp/src/search/search.cu index b907ad8b786..b4bd1c91401 100644 --- a/cpp/src/search/search.cu +++ b/cpp/src/search/search.cu @@ -45,37 +45,6 @@ namespace cudf { namespace { -template -void launch_search(DataIterator it_data, - ValuesIterator it_vals, - size_type data_size, - size_type values_size, - OutputIterator it_output, - Comparator comp, - bool find_first, - rmm::cuda_stream_view stream) -{ - if (find_first) { - thrust::lower_bound(rmm::exec_policy(stream), - it_data, - it_data + data_size, - it_vals, - it_vals + values_size, - it_output, - comp); - } else { - thrust::upper_bound(rmm::exec_policy(stream), - it_data, - it_data + data_size, - it_vals, - it_vals + values_size, - it_output, - comp); - } -} struct make_lhs_index { __device__ lhs_index_type operator()(size_type i) const { return static_cast(i); } @@ -95,6 +64,35 @@ auto make_rhs_index_counting_iterator(size_type start) return cudf::detail::make_counting_transform_iterator(start, make_rhs_index{}); }; +template +void launch_search(size_type search_table_size, + size_type values_size, + OutputIterator it_output, + Comparator comp, + bool find_first, + rmm::cuda_stream_view stream) +{ + auto const it_lhs = cudf::make_lhs_index_counting_iterator(0); + auto const it_rhs = cudf::make_rhs_index_counting_iterator(0); + if (find_first) { + thrust::lower_bound(rmm::exec_policy(stream), + it_lhs, + it_lhs + search_table_size, + it_rhs, + it_rhs + values_size, + it_output, + comp); + } else { + thrust::upper_bound(rmm::exec_policy(stream), + it_rhs, + it_rhs + search_table_size, + it_lhs, + it_lhs + values_size, + it_output, + comp); + } +} + std::unique_ptr search_ordered(table_view const& t, table_view const& values, bool find_first, @@ -135,28 +133,7 @@ std::unique_ptr search_ordered(table_view const& t, auto const has_null_elements = has_nested_nulls(lhs) or has_nested_nulls(rhs); auto const d_comparator = comparator.device_comparator(nullate::DYNAMIC{has_null_elements}); - auto const left_it = cudf::make_lhs_index_counting_iterator(0); - auto const right_it = cudf::make_rhs_index_counting_iterator(0); - - if (find_first) { - launch_search(left_it, - right_it, - t.num_rows(), - values.num_rows(), - result_out, - d_comparator, - find_first, - stream); - } else { - launch_search(right_it, - left_it, - t.num_rows(), - values.num_rows(), - result_out, - d_comparator, - find_first, - stream); - } + launch_search(t.num_rows(), values.num_rows(), result_out, d_comparator, find_first, stream); /* // Prepare to flatten the structs column auto const has_null_elements = has_nested_nulls(t) or has_nested_nulls(values); From b5b8b3989203e270c1fe87dc1974714d5e0b64bf Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Mon, 2 May 2022 21:25:53 -0700 Subject: [PATCH 08/49] Improve comments, remove old code. --- cpp/src/search/search.cu | 36 +++--------------------------------- 1 file changed, 3 insertions(+), 33 deletions(-) diff --git a/cpp/src/search/search.cu b/cpp/src/search/search.cu index b4bd1c91401..e3034d65368 100644 --- a/cpp/src/search/search.cu +++ b/cpp/src/search/search.cu @@ -72,6 +72,9 @@ void launch_search(size_type search_table_size, bool find_first, rmm::cuda_stream_view stream) { + // We use lhs and rhs to control the direction of the comparison with + // strongly-typed indices. The first pair of iterators are always the search + // table, and the second pair are the values. auto const it_lhs = cudf::make_lhs_index_counting_iterator(0); auto const it_rhs = cudf::make_rhs_index_counting_iterator(0); if (find_first) { @@ -134,39 +137,6 @@ std::unique_ptr search_ordered(table_view const& t, auto const d_comparator = comparator.device_comparator(nullate::DYNAMIC{has_null_elements}); launch_search(t.num_rows(), values.num_rows(), result_out, d_comparator, find_first, stream); - /* - // Prepare to flatten the structs column - auto const has_null_elements = has_nested_nulls(t) or has_nested_nulls(values); - auto const flatten_nullability = has_null_elements - ? structs::detail::column_nullability::FORCE - : structs::detail::column_nullability::MATCH_INCOMING; - - // 0-table_view, 1-column_order, 2-null_precedence, 3-validity_columns - auto const t_flattened = structs::detail::flatten_nested_columns( - matched.second.front(), column_order, null_precedence, flatten_nullability); - auto const values_flattened = - structs::detail::flatten_nested_columns(matched.second.back(), {}, {}, flatten_nullability); - - auto const t_d = table_device_view::create(t_flattened, stream); - auto const values_d = table_device_view::create(values_flattened, stream); - auto const& lhs = find_first ? *t_d : *values_d; - auto const& rhs = find_first ? *values_d : *t_d; - - auto const& column_order_flattened = t_flattened.orders(); - auto const& null_precedence_flattened = t_flattened.null_orders(); - auto const column_order_dv = detail::make_device_uvector_async(column_order_flattened, stream); - auto const null_precedence_dv = - detail::make_device_uvector_async(null_precedence_flattened, stream); - - auto const count_it = thrust::make_counting_iterator(0); - auto const comp = row_lexicographic_comparator(nullate::DYNAMIC{has_null_elements}, - lhs, - rhs, - column_order_dv.data(), - null_precedence_dv.data()); - launch_search( - count_it, count_it, t.num_rows(), values.num_rows(), result_out, comp, find_first, stream); - */ return result; } From 73c4b27f7461d80bee1276532f0ed89a438b5866 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Wed, 11 May 2022 16:15:11 -0500 Subject: [PATCH 09/49] Move strong typing code into cudf::experimental::row::lexicographic. --- .../cudf/detail/utilities/strong_index.hpp | 26 ------------------- .../cudf/table/experimental/row_operators.cuh | 22 +++++++++++++++- cpp/src/search/search.cu | 23 ++-------------- 3 files changed, 23 insertions(+), 48 deletions(-) delete mode 100644 cpp/include/cudf/detail/utilities/strong_index.hpp diff --git a/cpp/include/cudf/detail/utilities/strong_index.hpp b/cpp/include/cudf/detail/utilities/strong_index.hpp deleted file mode 100644 index 2762364f811..00000000000 --- a/cpp/include/cudf/detail/utilities/strong_index.hpp +++ /dev/null @@ -1,26 +0,0 @@ -/* - * Copyright (c) 2022, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -namespace cudf { - -enum class lhs_index_type : cudf::size_type {}; -enum class rhs_index_type : cudf::size_type {}; - -} // namespace cudf diff --git a/cpp/include/cudf/table/experimental/row_operators.cuh b/cpp/include/cudf/table/experimental/row_operators.cuh index 4f4a133e6e0..27370e42169 100644 --- a/cpp/include/cudf/table/experimental/row_operators.cuh +++ b/cpp/include/cudf/table/experimental/row_operators.cuh @@ -22,7 +22,6 @@ #include #include #include -#include #include #include #include @@ -433,6 +432,27 @@ class self_comparator { std::shared_ptr d_t; }; +enum class lhs_index_type : size_type {}; +enum class rhs_index_type : size_type {}; + +struct make_lhs_index { + __device__ lhs_index_type operator()(size_type i) const { return static_cast(i); } +}; + +struct make_rhs_index { + __device__ rhs_index_type operator()(size_type i) const { return static_cast(i); } +}; + +auto inline make_lhs_index_counting_iterator(size_type start) +{ + return cudf::detail::make_counting_transform_iterator(start, make_lhs_index{}); +}; + +auto inline make_rhs_index_counting_iterator(size_type start) +{ + return cudf::detail::make_counting_transform_iterator(start, make_rhs_index{}); +}; + template class two_table_device_row_comparator_adapter { friend class two_table_comparator; diff --git a/cpp/src/search/search.cu b/cpp/src/search/search.cu index 11be9277ff9..4e0d47532c8 100644 --- a/cpp/src/search/search.cu +++ b/cpp/src/search/search.cu @@ -19,7 +19,6 @@ #include #include #include -#include #include #include #include @@ -46,24 +45,6 @@ namespace cudf { namespace { -struct make_lhs_index { - __device__ lhs_index_type operator()(size_type i) const { return static_cast(i); } -}; - -struct make_rhs_index { - __device__ rhs_index_type operator()(size_type i) const { return static_cast(i); } -}; - -auto make_lhs_index_counting_iterator(size_type start) -{ - return cudf::detail::make_counting_transform_iterator(start, make_lhs_index{}); -}; - -auto make_rhs_index_counting_iterator(size_type start) -{ - return cudf::detail::make_counting_transform_iterator(start, make_rhs_index{}); -}; - std::unique_ptr search_ordered(table_view const& haystack, table_view const& needles, bool find_first, @@ -107,8 +88,8 @@ std::unique_ptr search_ordered(table_view const& haystack, // We use lhs and rhs to control the direction of the comparison with // strongly-typed indices. The first pair of iterators are always the // haystack, and the second pair are the needles. - auto const lhs_it = cudf::make_lhs_index_counting_iterator(0); - auto const rhs_it = cudf::make_rhs_index_counting_iterator(0); + auto const lhs_it = cudf::experimental::row::lexicographic::make_lhs_index_counting_iterator(0); + auto const rhs_it = cudf::experimental::row::lexicographic::make_rhs_index_counting_iterator(0); if (find_first) { thrust::lower_bound(rmm::exec_policy(stream), From c8a38fe74398baa213486b382634658421dddea6 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Fri, 13 May 2022 12:04:23 -0500 Subject: [PATCH 10/49] Improve comment. --- cpp/include/cudf/table/experimental/row_operators.cuh | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/cpp/include/cudf/table/experimental/row_operators.cuh b/cpp/include/cudf/table/experimental/row_operators.cuh index 6d8473945eb..26156d5eb64 100644 --- a/cpp/include/cudf/table/experimental/row_operators.cuh +++ b/cpp/include/cudf/table/experimental/row_operators.cuh @@ -529,14 +529,13 @@ class two_table_device_row_comparator_adapter { auto const left_right_ordering = comp(static_cast(lhs_index), static_cast(rhs_index)); - // Invert less/greater values + // Invert less/greater values to reflect right to left ordering if (left_right_ordering == weak_ordering::LESS) { return weak_ordering::GREATER; } else if (left_right_ordering == weak_ordering::GREATER) { return weak_ordering::LESS; - } else { - return weak_ordering::EQUIVALENT; } + return weak_ordering::EQUIVALENT; } private: From 8b5ef34013e4612990be94369319f90b4825a227 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Fri, 13 May 2022 12:06:43 -0500 Subject: [PATCH 11/49] Fix docstrings. --- cpp/include/cudf/table/experimental/row_operators.cuh | 6 +++--- cpp/include/cudf/table/row_operators.cuh | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cpp/include/cudf/table/experimental/row_operators.cuh b/cpp/include/cudf/table/experimental/row_operators.cuh index 26156d5eb64..bd55f2f4071 100644 --- a/cpp/include/cudf/table/experimental/row_operators.cuh +++ b/cpp/include/cudf/table/experimental/row_operators.cuh @@ -239,7 +239,7 @@ class device_row_comparator { * @brief Checks whether the row at `lhs_index` in the `lhs` table compares * lexicographically less, greater, or equivalent to the row at `rhs_index` in the `rhs` table. * - * @param lhs_index The index of row in the `lhs` table to examine + * @param lhs_index The index of the row in the `lhs` table to examine * @param rhs_index The index of the row in the `rhs` table to examine * @return weak ordering comparison of the row in the `lhs` table relative to the row in the `rhs` * table @@ -505,7 +505,7 @@ class two_table_device_row_comparator_adapter { * @brief Checks whether the row at `lhs_index` in the `lhs` table compares * lexicographically less than the row at `rhs_index` in the `rhs` table. * - * @param lhs_index The index of row in the `lhs` table to examine + * @param lhs_index The index of the row in the `lhs` table to examine * @param rhs_index The index of the row in the `rhs` table to examine * @return `true` if row from the `lhs` table compares less than row in the `rhs` table */ @@ -519,7 +519,7 @@ class two_table_device_row_comparator_adapter { * @brief Checks whether the row at `rhs_index` in the `rhs` table compares * lexicographically less than the row at `lhs_index` in the `lhs` table. * - * @param rhs_index The index of row in the `rhs` table to examine + * @param rhs_index The index of the row in the `rhs` table to examine * @param lhs_index The index of the row in the `lhs` table to examine * @return `true` if row from the `rhs` table compares less than row in the `lhs` table */ diff --git a/cpp/include/cudf/table/row_operators.cuh b/cpp/include/cudf/table/row_operators.cuh index 4d503cd53b8..a181e9bae63 100644 --- a/cpp/include/cudf/table/row_operators.cuh +++ b/cpp/include/cudf/table/row_operators.cuh @@ -389,7 +389,7 @@ class row_lexicographic_comparator { * @brief Checks whether the row at `lhs_index` in the `lhs` table compares * lexicographically less than the row at `rhs_index` in the `rhs` table. * - * @param lhs_index The index of row in the `lhs` table to examine + * @param lhs_index The index of the row in the `lhs` table to examine * @param rhs_index The index of the row in the `rhs` table to examine * @return `true` if row from the `lhs` table compares less than row in the * `rhs` table From 77f85b4869a277c741d832fa3fcf443defb6586d Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Fri, 13 May 2022 14:22:57 -0500 Subject: [PATCH 12/49] Enable weak ordering machinery (weak_ordering_comparator_impl) to wrap both weakly and strongly typed row comparators. --- .../cudf/table/experimental/row_operators.cuh | 34 ++++++++++--------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/cpp/include/cudf/table/experimental/row_operators.cuh b/cpp/include/cudf/table/experimental/row_operators.cuh index bd55f2f4071..adc18c80850 100644 --- a/cpp/include/cudf/table/experimental/row_operators.cuh +++ b/cpp/include/cudf/table/experimental/row_operators.cuh @@ -293,9 +293,10 @@ class device_row_comparator { */ template struct weak_ordering_comparator_impl { - __device__ bool operator()(size_type const lhs, size_type const rhs) const noexcept + template + __device__ bool operator()(Ts&&... args) const noexcept { - weak_ordering const result = comparator(lhs, rhs); + weak_ordering const result = comparator(std::forward(args)...); return ((result == values) || ...); } Comparator comparator; @@ -307,12 +308,11 @@ struct weak_ordering_comparator_impl { * * @tparam Nullate A cudf::nullate type describing whether to check for nulls. */ -template -using less_comparator = - weak_ordering_comparator_impl, weak_ordering::LESS>; +template