diff --git a/cpp/include/cudf/detail/stream_compaction.hpp b/cpp/include/cudf/detail/stream_compaction.hpp index e3ef4190fd2..eab858b527a 100644 --- a/cpp/include/cudf/detail/stream_compaction.hpp +++ b/cpp/include/cudf/detail/stream_compaction.hpp @@ -29,9 +29,7 @@ namespace cudf { namespace detail { /** * @copydoc cudf::drop_nulls(table_view const&, std::vector const&, - * cudf::size_type, rmm::device_async_resource_ref) - * - * @param[in] stream CUDA stream used for device memory operations and kernel launches. + * cudf::size_type, rmm::cuda_stream_view, rmm::device_async_resource_ref) */ std::unique_ptr drop_nulls(table_view const& input, std::vector const& keys, @@ -41,9 +39,7 @@ std::unique_ptr
drop_nulls(table_view const& input, /** * @copydoc cudf::drop_nans(table_view const&, std::vector const&, - * cudf::size_type, rmm::device_async_resource_ref) - * - * @param[in] stream CUDA stream used for device memory operations and kernel launches. + * cudf::size_type, rmm::cuda_stream_view, rmm::device_async_resource_ref) */ std::unique_ptr
drop_nans(table_view const& input, std::vector const& keys, @@ -53,8 +49,6 @@ std::unique_ptr
drop_nans(table_view const& input, /** * @copydoc cudf::apply_boolean_mask - * - * @param[in] stream CUDA stream used for device memory operations and kernel launches. */ std::unique_ptr
apply_boolean_mask(table_view const& input, column_view const& boolean_mask, @@ -63,8 +57,6 @@ std::unique_ptr
apply_boolean_mask(table_view const& input, /** * @copydoc cudf::unique - * - * @param[in] stream CUDA stream used for device memory operations and kernel launches. */ std::unique_ptr
unique(table_view const& input, std::vector const& keys, @@ -75,8 +67,6 @@ std::unique_ptr
unique(table_view const& input, /** * @copydoc cudf::distinct - * - * @param[in] stream CUDA stream used for device memory operations and kernel launches. */ std::unique_ptr
distinct(table_view const& input, std::vector const& keys, @@ -110,9 +100,7 @@ rmm::device_uvector distinct_indices(table_view const& input, rmm::device_async_resource_ref mr); /** - * @copydoc cudf::unique_count(column_view const&, null_policy, nan_policy) - * - * @param[in] stream CUDA stream used for device memory operations and kernel launches. + * @copydoc cudf::unique_count(column_view const&, null_policy, nan_policy, rmm::cuda_stream_view) */ cudf::size_type unique_count(column_view const& input, null_policy null_handling, @@ -120,18 +108,14 @@ cudf::size_type unique_count(column_view const& input, rmm::cuda_stream_view stream); /** - * @copydoc cudf::unique_count(table_view const&, null_equality) - * - * @param[in] stream CUDA stream used for device memory operations and kernel launches. + * @copydoc cudf::unique_count(table_view const&, null_equality, rmm::cuda_stream_view) */ cudf::size_type unique_count(table_view const& input, null_equality nulls_equal, rmm::cuda_stream_view stream); /** - * @copydoc cudf::distinct_count(column_view const&, null_policy, nan_policy) - * - * @param[in] stream CUDA stream used for device memory operations and kernel launches. + * @copydoc cudf::distinct_count(column_view const&, null_policy, nan_policy, rmm::cuda_stream_view) */ cudf::size_type distinct_count(column_view const& input, null_policy null_handling, @@ -139,9 +123,7 @@ cudf::size_type distinct_count(column_view const& input, rmm::cuda_stream_view stream); /** - * @copydoc cudf::distinct_count(table_view const&, null_equality) - * - * @param[in] stream CUDA stream used for device memory operations and kernel launches. + * @copydoc cudf::distinct_count(table_view const&, null_equality, rmm::cuda_stream_view) */ cudf::size_type distinct_count(table_view const& input, null_equality nulls_equal, diff --git a/cpp/include/cudf/lists/detail/stream_compaction.hpp b/cpp/include/cudf/lists/detail/stream_compaction.hpp index f5e5b29bc8f..3c3d4981ef9 100644 --- a/cpp/include/cudf/lists/detail/stream_compaction.hpp +++ b/cpp/include/cudf/lists/detail/stream_compaction.hpp @@ -24,10 +24,7 @@ namespace cudf::lists::detail { /** - * @copydoc cudf::lists::apply_boolean_mask(lists_column_view const&, lists_column_view const&, - * rmm::device_async_resource_ref) - * - * @param stream CUDA stream used for device memory operations and kernel launches + * @copydoc cudf::lists::apply_boolean_mask */ std::unique_ptr apply_boolean_mask(lists_column_view const& input, lists_column_view const& boolean_mask, @@ -35,9 +32,7 @@ std::unique_ptr apply_boolean_mask(lists_column_view const& input, rmm::device_async_resource_ref mr); /** - * @copydoc cudf::list::distinct - * - * @param stream CUDA stream used for device memory operations and kernel launches. + * @copydoc cudf::lists::distinct */ std::unique_ptr distinct(lists_column_view const& input, null_equality nulls_equal, diff --git a/cpp/include/cudf/stream_compaction.hpp b/cpp/include/cudf/stream_compaction.hpp index 181af11adb8..863da217282 100644 --- a/cpp/include/cudf/stream_compaction.hpp +++ b/cpp/include/cudf/stream_compaction.hpp @@ -66,6 +66,7 @@ namespace cudf { * @param[in] keys vector of indices representing key columns from `input` * @param[in] keep_threshold The minimum number of non-null fields in a row * required to keep the row. + * @param[in] stream CUDA stream used for device memory operations and kernel launches * @param[in] mr Device memory resource used to allocate the returned table's device memory * @return Table containing all rows of the `input` with at least @p * keep_threshold non-null fields in @p keys. @@ -74,6 +75,7 @@ std::unique_ptr
drop_nulls( table_view const& input, std::vector const& keys, cudf::size_type keep_threshold, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @@ -98,6 +100,7 @@ std::unique_ptr
drop_nulls( * * @param[in] input The input `table_view` to filter * @param[in] keys vector of indices representing key columns from `input` + * @param[in] stream CUDA stream used for device memory operations and kernel launches * @param[in] mr Device memory resource used to allocate the returned table's device memory * @return Table containing all rows of the `input` without nulls in the columns * of @p keys. @@ -105,6 +108,7 @@ std::unique_ptr
drop_nulls( std::unique_ptr
drop_nulls( table_view const& input, std::vector const& keys, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @@ -140,6 +144,7 @@ std::unique_ptr
drop_nulls( * @param[in] keys vector of indices representing key columns from `input` * @param[in] keep_threshold The minimum number of non-NAN elements in a row * required to keep the row. + * @param[in] stream CUDA stream used for device memory operations and kernel launches * @param[in] mr Device memory resource used to allocate the returned table's device memory * @return Table containing all rows of the `input` with at least @p * keep_threshold non-NAN elements in @p keys. @@ -148,6 +153,7 @@ std::unique_ptr
drop_nans( table_view const& input, std::vector const& keys, cudf::size_type keep_threshold, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @@ -173,6 +179,7 @@ std::unique_ptr
drop_nans( * * @param[in] input The input `table_view` to filter * @param[in] keys vector of indices representing key columns from `input` + * @param[in] stream CUDA stream used for device memory operations and kernel launches * @param[in] mr Device memory resource used to allocate the returned table's device memory * @return Table containing all rows of the `input` without NANs in the columns * of @p keys. @@ -180,6 +187,7 @@ std::unique_ptr
drop_nans( std::unique_ptr
drop_nans( table_view const& input, std::vector const& keys, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @@ -199,6 +207,7 @@ std::unique_ptr
drop_nans( * @param[in] input The input table_view to filter * @param[in] boolean_mask A nullable column_view of type type_id::BOOL8 used * as a mask to filter the `input`. + * @param[in] stream CUDA stream used for device memory operations and kernel launches * @param[in] mr Device memory resource used to allocate the returned table's device memory * @return Table containing copy of all rows of @p input passing * the filter defined by @p boolean_mask. @@ -206,6 +215,7 @@ std::unique_ptr
drop_nans( std::unique_ptr
apply_boolean_mask( table_view const& input, column_view const& boolean_mask, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @@ -240,6 +250,7 @@ enum class duplicate_keep_option { * @param[in] keep keep any, first, last, or none of the found duplicates * @param[in] nulls_equal flag to denote nulls are equal if null_equality::EQUAL, nulls are not * equal if null_equality::UNEQUAL + * @param[in] stream CUDA stream used for device memory operations and kernel launches * @param[in] mr Device memory resource used to allocate the returned table's device * memory * @@ -250,6 +261,7 @@ std::unique_ptr
unique( std::vector const& keys, duplicate_keep_option keep, null_equality nulls_equal = null_equality::EQUAL, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @@ -268,6 +280,7 @@ std::unique_ptr
unique( * @param keep Copy any, first, last, or none of the found duplicates * @param nulls_equal Flag to specify whether null elements should be considered as equal * @param nans_equal Flag to specify whether NaN elements should be considered as equal + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned table * @return Table with distinct rows in an unspecified order */ @@ -277,6 +290,7 @@ std::unique_ptr
distinct( duplicate_keep_option keep = duplicate_keep_option::KEEP_ANY, null_equality nulls_equal = null_equality::EQUAL, nan_equality nans_equal = nan_equality::ALL_EQUAL, + rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); /** @@ -345,12 +359,14 @@ std::unique_ptr
stable_distinct( * @param[in] input The column_view whose consecutive groups of equivalent rows will be counted * @param[in] null_handling flag to include or ignore `null` while counting * @param[in] nan_handling flag to consider `NaN==null` or not + * @param[in] stream CUDA stream used for device memory operations and kernel launches * * @return number of consecutive groups of equivalent rows in the column */ cudf::size_type unique_count(column_view const& input, null_policy null_handling, - nan_policy nan_handling); + nan_policy nan_handling, + rmm::cuda_stream_view stream = cudf::get_default_stream()); /** * @brief Count the number of consecutive groups of equivalent rows in a table. @@ -358,11 +374,13 @@ cudf::size_type unique_count(column_view const& input, * @param[in] input Table whose consecutive groups of equivalent rows will be counted * @param[in] nulls_equal flag to denote if null elements should be considered equal * nulls are not equal if null_equality::UNEQUAL. + * @param[in] stream CUDA stream used for device memory operations and kernel launches * * @return number of consecutive groups of equivalent rows in the column */ cudf::size_type unique_count(table_view const& input, - null_equality nulls_equal = null_equality::EQUAL); + null_equality nulls_equal = null_equality::EQUAL, + rmm::cuda_stream_view stream = cudf::get_default_stream()); /** * @brief Count the distinct elements in the column_view. @@ -381,12 +399,14 @@ cudf::size_type unique_count(table_view const& input, * @param[in] input The column_view whose distinct elements will be counted * @param[in] null_handling flag to include or ignore `null` while counting * @param[in] nan_handling flag to consider `NaN==null` or not + * @param[in] stream CUDA stream used for device memory operations and kernel launches * * @return number of distinct rows in the table */ cudf::size_type distinct_count(column_view const& input, null_policy null_handling, - nan_policy nan_handling); + nan_policy nan_handling, + rmm::cuda_stream_view stream = cudf::get_default_stream()); /** * @brief Count the distinct rows in a table. @@ -394,11 +414,13 @@ cudf::size_type distinct_count(column_view const& input, * @param[in] input Table whose distinct rows will be counted * @param[in] nulls_equal flag to denote if null elements should be considered equal. * nulls are not equal if null_equality::UNEQUAL. + * @param[in] stream CUDA stream used for device memory operations and kernel launches * * @return number of distinct rows in the table */ cudf::size_type distinct_count(table_view const& input, - null_equality nulls_equal = null_equality::EQUAL); + null_equality nulls_equal = null_equality::EQUAL, + rmm::cuda_stream_view stream = cudf::get_default_stream()); /** @} */ } // namespace cudf diff --git a/cpp/src/stream_compaction/apply_boolean_mask.cu b/cpp/src/stream_compaction/apply_boolean_mask.cu index cdca9517d94..9812f4ffbd7 100644 --- a/cpp/src/stream_compaction/apply_boolean_mask.cu +++ b/cpp/src/stream_compaction/apply_boolean_mask.cu @@ -91,9 +91,10 @@ std::unique_ptr
apply_boolean_mask(table_view const& input, */ std::unique_ptr
apply_boolean_mask(table_view const& input, column_view const& boolean_mask, + rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); - return detail::apply_boolean_mask(input, boolean_mask, cudf::get_default_stream(), mr); + return detail::apply_boolean_mask(input, boolean_mask, stream, mr); } } // namespace cudf diff --git a/cpp/src/stream_compaction/distinct.cu b/cpp/src/stream_compaction/distinct.cu index e5cf29f3ebf..0d2a81c7a5c 100644 --- a/cpp/src/stream_compaction/distinct.cu +++ b/cpp/src/stream_compaction/distinct.cu @@ -149,11 +149,11 @@ std::unique_ptr
distinct(table_view const& input, duplicate_keep_option keep, null_equality nulls_equal, nan_equality nans_equal, + rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); - return detail::distinct( - input, keys, keep, nulls_equal, nans_equal, cudf::get_default_stream(), mr); + return detail::distinct(input, keys, keep, nulls_equal, nans_equal, stream, mr); } std::unique_ptr distinct_indices(table_view const& input, diff --git a/cpp/src/stream_compaction/distinct_count.cu b/cpp/src/stream_compaction/distinct_count.cu index 9843bb889f4..81a045a064c 100644 --- a/cpp/src/stream_compaction/distinct_count.cu +++ b/cpp/src/stream_compaction/distinct_count.cu @@ -217,15 +217,18 @@ cudf::size_type distinct_count(column_view const& input, cudf::size_type distinct_count(column_view const& input, null_policy null_handling, - nan_policy nan_handling) + nan_policy nan_handling, + rmm::cuda_stream_view stream) { CUDF_FUNC_RANGE(); - return detail::distinct_count(input, null_handling, nan_handling, cudf::get_default_stream()); + return detail::distinct_count(input, null_handling, nan_handling, stream); } -cudf::size_type distinct_count(table_view const& input, null_equality nulls_equal) +cudf::size_type distinct_count(table_view const& input, + null_equality nulls_equal, + rmm::cuda_stream_view stream) { CUDF_FUNC_RANGE(); - return detail::distinct_count(input, nulls_equal, cudf::get_default_stream()); + return detail::distinct_count(input, nulls_equal, stream); } } // namespace cudf diff --git a/cpp/src/stream_compaction/drop_nans.cu b/cpp/src/stream_compaction/drop_nans.cu index b46381c8ff6..b98ebbc2ecc 100644 --- a/cpp/src/stream_compaction/drop_nans.cu +++ b/cpp/src/stream_compaction/drop_nans.cu @@ -117,20 +117,22 @@ std::unique_ptr
drop_nans(table_view const& input, std::unique_ptr
drop_nans(table_view const& input, std::vector const& keys, cudf::size_type keep_threshold, + rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); - return detail::drop_nans(input, keys, keep_threshold, cudf::get_default_stream(), mr); + return detail::drop_nans(input, keys, keep_threshold, stream, mr); } /* * Filters a table to remove nan elements. */ std::unique_ptr
drop_nans(table_view const& input, std::vector const& keys, + rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); - return detail::drop_nans(input, keys, keys.size(), cudf::get_default_stream(), mr); + return detail::drop_nans(input, keys, keys.size(), stream, mr); } } // namespace cudf diff --git a/cpp/src/stream_compaction/drop_nulls.cu b/cpp/src/stream_compaction/drop_nulls.cu index cb7cd61bf02..2497e4e5065 100644 --- a/cpp/src/stream_compaction/drop_nulls.cu +++ b/cpp/src/stream_compaction/drop_nulls.cu @@ -90,20 +90,22 @@ std::unique_ptr
drop_nulls(table_view const& input, std::unique_ptr
drop_nulls(table_view const& input, std::vector const& keys, cudf::size_type keep_threshold, + rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); - return detail::drop_nulls(input, keys, keep_threshold, cudf::get_default_stream(), mr); + return detail::drop_nulls(input, keys, keep_threshold, stream, mr); } /* * Filters a table to remove null elements. */ std::unique_ptr
drop_nulls(table_view const& input, std::vector const& keys, + rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); - return detail::drop_nulls(input, keys, keys.size(), cudf::get_default_stream(), mr); + return detail::drop_nulls(input, keys, keys.size(), stream, mr); } } // namespace cudf diff --git a/cpp/src/stream_compaction/unique.cu b/cpp/src/stream_compaction/unique.cu index edb47984d13..93de0e60b6d 100644 --- a/cpp/src/stream_compaction/unique.cu +++ b/cpp/src/stream_compaction/unique.cu @@ -119,10 +119,11 @@ std::unique_ptr
unique(table_view const& input, std::vector const& keys, duplicate_keep_option const keep, null_equality nulls_equal, + rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); - return detail::unique(input, keys, keep, nulls_equal, cudf::get_default_stream(), mr); + return detail::unique(input, keys, keep, nulls_equal, stream, mr); } } // namespace cudf diff --git a/cpp/tests/streams/stream_compaction_test.cpp b/cpp/tests/streams/stream_compaction_test.cpp index 56443870602..7c3df514699 100644 --- a/cpp/tests/streams/stream_compaction_test.cpp +++ b/cpp/tests/streams/stream_compaction_test.cpp @@ -41,6 +41,7 @@ auto constexpr NULL_UNEQUAL = cudf::null_equality::UNEQUAL; auto constexpr NAN_EQUAL = cudf::nan_equality::ALL_EQUAL; auto constexpr NAN_UNEQUAL = cudf::nan_equality::UNEQUAL; +using int16s_col = cudf::test::fixed_width_column_wrapper; using int32s_col = cudf::test::fixed_width_column_wrapper; using floats_col = cudf::test::fixed_width_column_wrapper; @@ -51,11 +52,9 @@ using cudf::test::iterators::no_nulls; using cudf::test::iterators::null_at; using cudf::test::iterators::nulls_at; -struct StableDistinctKeepAny : public cudf::test::BaseFixture {}; +struct StreamCompactionTest : public cudf::test::BaseFixture {}; -struct StableDistinctKeepFirstLastNone : public cudf::test::BaseFixture {}; - -TEST_F(StableDistinctKeepAny, NoNullsTableWithNaNs) +TEST_F(StreamCompactionTest, StableDistinctKeepAnyNoNullsTableWithNaNs) { // Column(s) used to test KEEP_ANY needs to have same rows in contiguous // groups for equivalent keys because KEEP_ANY is nondeterministic. @@ -94,7 +93,7 @@ TEST_F(StableDistinctKeepAny, NoNullsTableWithNaNs) } } -TEST_F(StableDistinctKeepAny, InputWithNullsAndNaNs) +TEST_F(StreamCompactionTest, StableDistinctKeepAnyInputWithNullsAndNaNs) { auto constexpr null{0.0}; // shadow the global `null` variable of type int @@ -150,7 +149,7 @@ TEST_F(StableDistinctKeepAny, InputWithNullsAndNaNs) } } -TEST_F(StableDistinctKeepFirstLastNone, InputWithNaNsEqual) +TEST_F(StreamCompactionTest, StableDistinctKeepFirstLastNoneInputWithNaNsEqual) { // Column(s) used to test needs to have different rows for the same keys. auto const col = int32s_col{0, 1, 2, 3, 4, 5, 6}; @@ -192,7 +191,7 @@ TEST_F(StableDistinctKeepFirstLastNone, InputWithNaNsEqual) } } -TEST_F(StableDistinctKeepFirstLastNone, InputWithNaNsUnequal) +TEST_F(StreamCompactionTest, StableDistinctKeepFirstLastNoneInputWithNaNsUnequal) { // Column(s) used to test needs to have different rows for the same keys. auto const col = int32s_col{0, 1, 2, 3, 4, 5, 6, 7}; @@ -233,3 +232,262 @@ TEST_F(StableDistinctKeepFirstLastNone, InputWithNaNsUnequal) CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result); } } + +TEST_F(StreamCompactionTest, DropNaNs) +{ + auto const col1 = floats_col{{1., 2., NaN, NaN, 5., 6.}, nulls_at({2, 5})}; + auto const col2 = int32s_col{{10, 40, 70, 5, 2, 10}, nulls_at({2, 5})}; + auto const col3 = floats_col{{NaN, 40., 70., NaN, 2., 10.}, nulls_at({2, 5})}; + cudf::table_view input{{col1, col2, col3}}; + + std::vector keys{0, 2}; + + { + // With keep_threshold + auto const col1_expected = floats_col{{1., 2., 3., 5., 6.}, nulls_at({2, 4})}; + auto const col2_expected = int32s_col{{10, 40, 70, 2, 10}, nulls_at({2, 4})}; + auto const col3_expected = floats_col{{NaN, 40., 70., 2., 10.}, nulls_at({2, 4})}; + cudf::table_view expected{{col1_expected, col2_expected, col3_expected}}; + + auto result = cudf::drop_nans(input, keys, keys.size() - 1, cudf::test::get_default_stream()); + + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result); + } + + { + // Without keep_threshold + auto const col1_expected = floats_col{{2., 3., 5., 6.}, nulls_at({1, 3})}; + auto const col2_expected = int32s_col{{40, 70, 2, 10}, nulls_at({1, 3})}; + auto const col3_expected = floats_col{{40., 70., 2., 10.}, nulls_at({1, 3})}; + cudf::table_view expected{{col1_expected, col2_expected, col3_expected}}; + + auto result = cudf::drop_nans(input, keys, cudf::test::get_default_stream()); + + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result); + } +} + +TEST_F(StreamCompactionTest, DropNulls) +{ + auto const col1 = int16s_col{{1, 0, 1, 0, 1, 0}, nulls_at({2, 5})}; + auto const col2 = int32s_col{{10, 40, 70, 5, 2, 10}, nulls_at({2})}; + auto const col3 = floats_col{{10., 40., 70., 5., 2., 10.}, no_nulls()}; + cudf::table_view input{{col1, col2, col3}}; + std::vector keys{0, 1, 2}; + + { + // With keep_threshold + auto const col1_expected = int16s_col{{1, 0, 0, 1, 0}, null_at(4)}; + auto const col2_expected = int32s_col{{10, 40, 5, 2, 10}, no_nulls()}; + auto const col3_expected = floats_col{{10., 40., 5., 2., 10.}, no_nulls()}; + cudf::table_view expected{{col1_expected, col2_expected, col3_expected}}; + + auto result = cudf::drop_nulls(input, keys, keys.size() - 1, cudf::test::get_default_stream()); + + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result); + } + + { + // Without keep_threshold + auto const col1_expected = int16s_col{{1, 0, 0, 1}, no_nulls()}; + auto const col2_expected = int32s_col{{10, 40, 5, 2}, no_nulls()}; + auto const col3_expected = floats_col{{10., 40., 5., 2.}, no_nulls()}; + cudf::table_view expected{{col1_expected, col2_expected, col3_expected}}; + + auto result = cudf::drop_nulls(input, keys, cudf::test::get_default_stream()); + + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result); + } +} + +TEST_F(StreamCompactionTest, UniqueKeepFirstLastNone) +{ + auto const col1 = int32s_col{5, 4, 3, 5, 8, 5}; + auto const col2 = floats_col{4., 5., 3., 4., 9., 4.}; + auto const col1_key = int32s_col{20, 20, 20, 19, 21, 9}; + auto const col2_key = int32s_col{19, 19, 20, 20, 9, 21}; + + cudf::table_view input{{col1, col2, col1_key, col2_key}}; + std::vector keys = {2, 3}; + + { + // KEEP_FIRST + auto const exp_col1_first = int32s_col{5, 3, 5, 8, 5}; + auto const exp_col2_first = floats_col{4., 3., 4., 9., 4.}; + auto const exp_col1_key_first = int32s_col{20, 20, 19, 21, 9}; + auto const exp_col2_key_first = int32s_col{19, 20, 20, 9, 21}; + cudf::table_view expected_first{ + {exp_col1_first, exp_col2_first, exp_col1_key_first, exp_col2_key_first}}; + + auto const result = cudf::unique(input, + keys, + cudf::duplicate_keep_option::KEEP_FIRST, + cudf::null_equality::EQUAL, + cudf::test::get_default_stream()); + + CUDF_TEST_EXPECT_TABLES_EQUAL(expected_first, *result); + } + + { + // KEEP_LAST + auto const exp_col1_last = int32s_col{4, 3, 5, 8, 5}; + auto const exp_col2_last = floats_col{5., 3., 4., 9., 4.}; + auto const exp_col1_key_last = int32s_col{20, 20, 19, 21, 9}; + auto const exp_col2_key_last = int32s_col{19, 20, 20, 9, 21}; + cudf::table_view expected_last{ + {exp_col1_last, exp_col2_last, exp_col1_key_last, exp_col2_key_last}}; + + auto const result = cudf::unique(input, + keys, + cudf::duplicate_keep_option::KEEP_LAST, + cudf::null_equality::EQUAL, + cudf::test::get_default_stream()); + + CUDF_TEST_EXPECT_TABLES_EQUAL(expected_last, *result); + } + + { + // KEEP_NONE + auto const exp_col1_unique = int32s_col{3, 5, 8, 5}; + auto const exp_col2_unique = floats_col{3., 4., 9., 4.}; + auto const exp_col1_key_unique = int32s_col{20, 19, 21, 9}; + auto const exp_col2_key_unique = int32s_col{20, 20, 9, 21}; + cudf::table_view expected_unique{ + {exp_col1_unique, exp_col2_unique, exp_col1_key_unique, exp_col2_key_unique}}; + + auto const result = cudf::unique(input, + keys, + cudf::duplicate_keep_option::KEEP_NONE, + cudf::null_equality::EQUAL, + cudf::test::get_default_stream()); + + CUDF_TEST_EXPECT_TABLES_EQUAL(expected_unique, *result); + } +} + +TEST_F(StreamCompactionTest, DistinctKeepFirstLastNone) +{ + // Column(s) used to test needs to have different rows for the same keys. + auto const col1 = int32s_col{0, 1, 2, 3, 4, 5, 6}; + auto const col2 = floats_col{10, 11, 12, 13, 14, 15, 16}; + auto const keys1 = int32s_col{20, 20, 20, 20, 19, 21, 9}; + auto const keys2 = int32s_col{19, 19, 19, 20, 20, 9, 21}; + + auto const input = cudf::table_view{{col1, col2, keys1, keys2}}; + auto const key_idx = std::vector{2, 3}; + + // KEEP_FIRST + { + auto const exp_col1_sort = int32s_col{6, 4, 0, 3, 5}; + auto const exp_col2_sort = floats_col{16, 14, 10, 13, 15}; + auto const exp_keys1_sort = int32s_col{9, 19, 20, 20, 21}; + auto const exp_keys2_sort = int32s_col{21, 20, 19, 20, 9}; + auto const expected_sort = + cudf::table_view{{exp_col1_sort, exp_col2_sort, exp_keys1_sort, exp_keys2_sort}}; + + auto const result = cudf::distinct(input, + key_idx, + cudf::duplicate_keep_option::KEEP_FIRST, + cudf::null_equality::EQUAL, + cudf::nan_equality::ALL_EQUAL, + cudf::test::get_default_stream()); + auto const result_sort = cudf::sort_by_key(*result, result->select(key_idx)); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected_sort, *result_sort); + } + + // KEEP_LAST + { + auto const exp_col1_sort = int32s_col{6, 4, 2, 3, 5}; + auto const exp_col2_sort = floats_col{16, 14, 12, 13, 15}; + auto const exp_keys1_sort = int32s_col{9, 19, 20, 20, 21}; + auto const exp_keys2_sort = int32s_col{21, 20, 19, 20, 9}; + auto const expected_sort = + cudf::table_view{{exp_col1_sort, exp_col2_sort, exp_keys1_sort, exp_keys2_sort}}; + + auto const result = cudf::distinct(input, + key_idx, + cudf::duplicate_keep_option::KEEP_LAST, + cudf::null_equality::EQUAL, + cudf::nan_equality::ALL_EQUAL, + cudf::test::get_default_stream()); + auto const result_sort = cudf::sort_by_key(*result, result->select(key_idx)); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected_sort, *result_sort); + } + + // KEEP_NONE + { + auto const exp_col1_sort = int32s_col{6, 4, 3, 5}; + auto const exp_col2_sort = floats_col{16, 14, 13, 15}; + auto const exp_keys1_sort = int32s_col{9, 19, 20, 21}; + auto const exp_keys2_sort = int32s_col{21, 20, 20, 9}; + auto const expected_sort = + cudf::table_view{{exp_col1_sort, exp_col2_sort, exp_keys1_sort, exp_keys2_sort}}; + + auto const result = cudf::distinct(input, + key_idx, + cudf::duplicate_keep_option::KEEP_NONE, + cudf::null_equality::EQUAL, + cudf::nan_equality::ALL_EQUAL, + cudf::test::get_default_stream()); + auto const result_sort = cudf::sort_by_key(*result, result->select(key_idx)); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected_sort, *result_sort); + } +} + +TEST_F(StreamCompactionTest, ApplyBooleanMask) +{ + auto const col = int32s_col{ + 9668, 9590, 9526, 9205, 9434, 9347, 9160, 9569, 9143, 9807, 9606, 9446, 9279, 9822, 9691}; + cudf::test::fixed_width_column_wrapper mask({false, + false, + true, + false, + false, + true, + false, + true, + false, + true, + false, + false, + true, + false, + true}); + cudf::table_view input({col}); + auto const col_expected = int32s_col{9526, 9347, 9569, 9807, 9279, 9691}; + cudf::table_view expected({col_expected}); + auto const result = cudf::apply_boolean_mask(input, mask, cudf::test::get_default_stream()); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result); +} + +TEST_F(StreamCompactionTest, DistinctCountColumn) +{ + auto const col = int32s_col{1, 3, 3, 4, 31, 1, 8, 2, 0, 4, 1, 4, 10, 40, 31, 42, 0, 42, 8, 5, 4}; + EXPECT_EQ( + 11, cudf::distinct_count(col, cudf::null_policy::INCLUDE, cudf::nan_policy::NAN_IS_VALID)); // + // +} + +// TEST_F(StreamCompactionTest, DistinctCountTable) +// { +// using T = TypeParam; + +// auto const input1 = cudf::test::make_type_param_vector( +// {1, 3, 3, 3, 4, 31, 1, 8, 2, 0, 4, 1, 4, 10, 40, 31, 42, 0, 42, 8, 5, 4}); +// auto const input2 = cudf::test::make_type_param_vector( +// {3, 3, 3, 4, 31, 1, 8, 5, 0, 4, 1, 4, 10, 40, 31, 42, 0, 42, 8, 5, 4, 1}); + +// std::vector> pair_input; +// std::transform( +// input1.begin(), input1.end(), input2.begin(), std::back_inserter(pair_input), [](T a, T b) { +// return std::pair(a, b); +// }); + +// cudf::test::fixed_width_column_wrapper input_col1(input1.begin(), input1.end()); +// cudf::test::fixed_width_column_wrapper input_col2(input2.begin(), input2.end()); +// cudf::table_view input_table({input_col1, input_col2}); + +// auto const expected = static_cast( +// std::set>(pair_input.begin(), pair_input.end()).size()); +// EXPECT_EQ(expected, cudf::distinct_count(input_table, null_equality::EQUAL)); +// }