Skip to content

Commit

Permalink
Reimplement cudf::merge for nested types without using comparators (#…
Browse files Browse the repository at this point in the history
…14250)

Part of #11844 

This PR also uses new experimental comparators for non-nested types by introducing a new device constructor for `cudf::experimental::row::lexicographic::device_row_comparator`. In the case of non-nested types, preprocessing can be skipped so comparators can be created on the fly. This solution helps us avoid creating 3 comparator types because `thrust::merge` can call the operator with indices from either side of the table.

Furthermore, the PR reworks `cudf/detail/merge.cuh` by removing any CUDA headers/components to expose a true detail API of the form `cudf/detail/merge.hpp`.

[Benchmark comparison for non-nested types](#14250 (comment))

Compilation time increases from ~6 mins to ~7 mins.

Authors:
  - Divye Gala (https://github.com/divyegala)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - MithunR (https://github.com/mythrocks)

URL: #14250
  • Loading branch information
divyegala authored Oct 28, 2023
1 parent 751370e commit 2bc454a
Show file tree
Hide file tree
Showing 17 changed files with 525 additions and 186 deletions.
1 change: 1 addition & 0 deletions cpp/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,7 @@ ConfigureNVBench(HASHING_NVBENCH hashing/hash.cpp)
# ##################################################################################################
# * merge benchmark -------------------------------------------------------------------------------
ConfigureBench(MERGE_BENCH merge/merge.cpp)
ConfigureNVBench(MERGE_NVBENCH merge/merge_structs.cpp merge/merge_lists.cpp)

# ##################################################################################################
# * null_mask benchmark ---------------------------------------------------------------------------
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

#pragma once

#include <benchmarks/common/generate_input.hpp>
#include "generate_input.hpp"

#include <cudf_test/column_wrapper.hpp>

Expand Down
54 changes: 54 additions & 0 deletions cpp/benchmarks/merge/merge_lists.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
/*
* Copyright (c) 2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <benchmarks/common/generate_nested_types.hpp>

#include <cudf/detail/merge.hpp>
#include <cudf/detail/sorting.hpp>

#include <nvbench/nvbench.cuh>

void nvbench_merge_list(nvbench::state& state)
{
rmm::cuda_stream_view stream;

auto const input1 = create_lists_data(state);
auto const sorted_input1 =
cudf::detail::sort(*input1, {}, {}, stream, rmm::mr::get_current_device_resource());

auto const input2 = create_lists_data(state);
auto const sorted_input2 =
cudf::detail::sort(*input2, {}, {}, stream, rmm::mr::get_current_device_resource());

stream.synchronize();

state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
rmm::cuda_stream_view stream_view{launch.get_stream()};

cudf::detail::merge({*sorted_input1, *sorted_input2},
{0},
{cudf::order::ASCENDING},
{},
stream_view,
rmm::mr::get_current_device_resource());
});
}

NVBENCH_BENCH(nvbench_merge_list)
.set_name("merge_lists")
.add_int64_power_of_two_axis("size_bytes", {10, 18, 24, 28})
.add_int64_axis("depth", {1, 4})
.add_float64_axis("null_frequency", {0, 0.2});
54 changes: 54 additions & 0 deletions cpp/benchmarks/merge/merge_structs.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
/*
* Copyright (c) 2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <benchmarks/common/generate_nested_types.hpp>

#include <cudf/detail/merge.hpp>
#include <cudf/detail/sorting.hpp>

#include <nvbench/nvbench.cuh>

void nvbench_merge_struct(nvbench::state& state)
{
rmm::cuda_stream_view stream;

auto const input1 = create_structs_data(state);
auto const sorted_input1 =
cudf::detail::sort(*input1, {}, {}, stream, rmm::mr::get_current_device_resource());

auto const input2 = create_structs_data(state);
auto const sorted_input2 =
cudf::detail::sort(*input2, {}, {}, stream, rmm::mr::get_current_device_resource());

stream.synchronize();

state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
rmm::cuda_stream_view stream_view{launch.get_stream()};

cudf::detail::merge({*sorted_input1, *sorted_input2},
{0},
{cudf::order::ASCENDING},
{},
stream_view,
rmm::mr::get_current_device_resource());
});
}

NVBENCH_BENCH(nvbench_merge_struct)
.set_name("merge_struct")
.add_int64_power_of_two_axis("NumRows", {10, 18, 26})
.add_int64_axis("Depth", {0, 1, 8})
.add_int64_axis("Nulls", {0, 1});
3 changes: 2 additions & 1 deletion cpp/benchmarks/sort/rank_lists.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,10 @@
* limitations under the License.
*/

#include "nested_types_common.hpp"
#include "rank_types_common.hpp"

#include <benchmarks/common/generate_nested_types.hpp>

#include <cudf/sorting.hpp>

#include <cudf_test/column_utilities.hpp>
Expand Down
2 changes: 1 addition & 1 deletion cpp/benchmarks/sort/rank_structs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@
* limitations under the License.
*/

#include "nested_types_common.hpp"
#include "rank_types_common.hpp"
#include <benchmarks/common/generate_nested_types.hpp>

#include <cudf/sorting.hpp>

Expand Down
2 changes: 1 addition & 1 deletion cpp/benchmarks/sort/sort_lists.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
* limitations under the License.
*/

#include "nested_types_common.hpp"
#include <benchmarks/common/generate_nested_types.hpp>

#include <cudf/detail/sorting.hpp>

Expand Down
2 changes: 1 addition & 1 deletion cpp/benchmarks/sort/sort_structs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
* limitations under the License.
*/

#include "nested_types_common.hpp"
#include <benchmarks/common/generate_nested_types.hpp>

#include <cudf/detail/sorting.hpp>

Expand Down
166 changes: 0 additions & 166 deletions cpp/include/cudf/detail/merge.cuh

This file was deleted.

60 changes: 60 additions & 0 deletions cpp/include/cudf/detail/merge.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
/*
* Copyright (c) 2018-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include <rmm/device_uvector.hpp>

#include <thrust/pair.h>

namespace cudf {
namespace detail {

/**
* @brief Source table identifier to copy data from.
*/
enum class side : bool { LEFT, RIGHT };

/**
* @brief Tagged index type: `thrust::get<0>` indicates left/right side,
* `thrust::get<1>` indicates the row index
*/
using index_type = thrust::pair<side, cudf::size_type>;

/**
* @brief Vector of `index_type` values.
*/
using index_vector = rmm::device_uvector<index_type>;

/**
* @copydoc std::unique_ptr<cudf::table> merge(
* std::vector<table_view> const& tables_to_merge,
* std::vector<cudf::size_type> const& key_cols,
* std::vector<cudf::order> const& column_order,
* std::vector<cudf::null_order> const& null_precedence,
* rmm::mr::device_memory_resource* mr)
*
* @param stream CUDA stream used for device memory operations and kernel launches
*/
std::unique_ptr<cudf::table> merge(std::vector<table_view> const& tables_to_merge,
std::vector<cudf::size_type> const& key_cols,
std::vector<cudf::order> const& column_order,
std::vector<cudf::null_order> const& null_precedence,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr);

} // namespace detail
} // namespace cudf
Loading

0 comments on commit 2bc454a

Please sign in to comment.