Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve Hausdorff Many Column Performance #916

Merged
merged 12 commits into from
Apr 11, 2023
12 changes: 8 additions & 4 deletions cpp/include/cuspatial/distance/hausdorff.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

#pragma once

#include <cudf/table/table_view.hpp>
#include <cudf/types.hpp>

#include <rmm/mr/device/per_device_resource.hpp>
Expand Down Expand Up @@ -70,15 +71,18 @@ namespace cuspatial {
* : 2 : 2 : 0 : : 2 : : :
* +----------+----+-------+ +---------+---+------+
*
* returned as concatenation of columns
* [0 2 4 3 0 2 9 6 0]
* Returns:
* column: [0 4 2 9 0 6 3 2 0]
* table_view: [0 4 2] [9 0 6] [3 2 0]
*
* ```
*
* @param[in] xs: x component of points
* @param[in] ys: y component of points
* @param[in] space_offsets: beginning index of each space, plus the last space's end offset.
*
* @returns Hausdorff distances for each pair of spaces
* @returns An owning object of the result of the hausdorff distances.
* A table view containing the split view for each input space.
*
* @throw cudf::cuda_error if `xs` and `ys` lengths differ
* @throw cudf::cuda_error if `xs` and `ys` types differ
Expand All @@ -87,7 +91,7 @@ namespace cuspatial {
*
* @note Hausdorff distances are asymmetrical
*/
std::unique_ptr<cudf::column> directed_hausdorff_distance(
std::pair<std::unique_ptr<cudf::column>, cudf::table_view> directed_hausdorff_distance(
isVoid marked this conversation as resolved.
Show resolved Hide resolved
cudf::column_view const& xs,
cudf::column_view const& ys,
cudf::column_view const& space_offsets,
Expand Down
2 changes: 1 addition & 1 deletion cpp/include/cuspatial/experimental/detail/hausdorff.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ __global__ void kernel_hausdorff(
}

// determine the output offset for this pair of spaces (LHS, RHS)
Index output_idx = lhs_space_idx * num_spaces + rhs_space_idx;
Index output_idx = rhs_space_idx * num_spaces + lhs_space_idx;

// use atomicMax to find the maximum of the minimum distance calculated for each space pair.
atomicMax(&thrust::raw_reference_cast(*(results + output_idx)),
Expand Down
57 changes: 43 additions & 14 deletions cpp/src/spatial/hausdorff.cu
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,12 @@
#include <cudf/column/column_device_view.cuh>
#include <cudf/column/column_factories.hpp>
#include <cudf/column/column_view.hpp>
#include <cudf/detail/copy.hpp>
#include <cudf/detail/utilities/device_atomics.cuh>
#include <cudf/table/table_view.hpp>
#include <cudf/types.hpp>
#include <cudf/utilities/error.hpp>
#include <cudf/utilities/span.hpp>
#include <cudf/utilities/type_dispatcher.hpp>

#include <rmm/cuda_stream_view.hpp>
Expand All @@ -33,27 +36,52 @@
#include <thrust/binary_search.h>
#include <thrust/distance.h>
#include <thrust/fill.h>
#include <thrust/iterator/counting_iterator.h>
#include <thrust/sequence.h>

#include <memory>
#include <type_traits>

namespace {

/**
* @brief Split `col` into equal size chunks, each has `size`.
*
* @note only applicable to fixed width type.
* @note only applicable to columns of `size*size`.
*/
template <typename T>
std::vector<cudf::column_view> split_by_size(cudf::column_view const& col, cudf::size_type size)
{
std::vector<cudf::column_view> res;
cudf::size_type num_splits = col.size() / size;
std::transform(thrust::counting_iterator(0),
thrust::counting_iterator(num_splits),
std::back_inserter(res),
[size, num_splits, &col](int i) {
return cudf::column_view(
col.type(), size, col.data<T>(), nullptr, 0, size * i, {});
});
return res;
}

struct hausdorff_functor {
template <typename T, typename... Args>
std::enable_if_t<not std::is_floating_point<T>::value, std::unique_ptr<cudf::column>> operator()(
Args&&...)
std::enable_if_t<not std::is_floating_point<T>::value,
std::pair<std::unique_ptr<cudf::column>, cudf::table_view>>
operator()(Args&&...)
{
CUSPATIAL_FAIL("Non-floating point operation is not supported");
}

template <typename T>
std::enable_if_t<std::is_floating_point<T>::value, std::unique_ptr<cudf::column>> operator()(
cudf::column_view const& xs,
cudf::column_view const& ys,
cudf::column_view const& space_offsets,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
std::enable_if_t<std::is_floating_point<T>::value,
std::pair<std::unique_ptr<cudf::column>, cudf::table_view>>
operator()(cudf::column_view const& xs,
cudf::column_view const& ys,
cudf::column_view const& space_offsets,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
auto const num_points = static_cast<uint32_t>(xs.size());
auto const num_spaces = static_cast<uint32_t>(space_offsets.size());
Expand All @@ -66,7 +94,7 @@ struct hausdorff_functor {
auto result = cudf::make_fixed_width_column(
cudf::data_type{tid}, num_results, cudf::mask_state::UNALLOCATED, stream, mr);

if (result->size() == 0) { return result; }
if (result->size() == 0) { return {std::move(result), cudf::table_view{}}; }

auto const result_view = result->mutable_view();

Expand All @@ -80,18 +108,19 @@ struct hausdorff_functor {
result_view.begin<T>(),
stream);

return result;
return {std::move(result), cudf::table_view(split_by_size<T>(result->view(), num_spaces))};
}
};

} // namespace

namespace cuspatial {

std::unique_ptr<cudf::column> directed_hausdorff_distance(cudf::column_view const& xs,
cudf::column_view const& ys,
cudf::column_view const& space_offsets,
rmm::mr::device_memory_resource* mr)
std::pair<std::unique_ptr<cudf::column>, cudf::table_view> directed_hausdorff_distance(
cudf::column_view const& xs,
cudf::column_view const& ys,
cudf::column_view const& space_offsets,
rmm::mr::device_memory_resource* mr)
{
CUSPATIAL_EXPECTS(xs.type() == ys.type(), "Inputs `xs` and `ys` must have same type.");
CUSPATIAL_EXPECTS(xs.size() == ys.size(), "Inputs `xs` and `ys` must have same length.");
Expand Down
18 changes: 9 additions & 9 deletions cpp/tests/experimental/spatial/hausdorff_test.cu
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ TYPED_TEST(HausdorffTest, Simple)
this->template test<cuspatial::vec_2d<TypeParam>, uint32_t>(
{{0, 0}, {1, 0}, {0, 1}, {0, 2}},
{{0, 2}},
{{0.0, static_cast<TypeParam>(std::sqrt(2.0)), 2.0, 0.0}});
{{0.0, 2.0, static_cast<TypeParam>(std::sqrt(2.0)), 0.0}});
}

TYPED_TEST(HausdorffTest, SingleTrajectorySinglePoint)
Expand All @@ -85,7 +85,7 @@ TYPED_TEST(HausdorffTest, SingleTrajectorySinglePoint)
TYPED_TEST(HausdorffTest, TwoShortSpaces)
{
this->template test<cuspatial::vec_2d<TypeParam>, uint32_t>(
{{0, 0}, {5, 12}, {4, 3}}, {{0, 1}}, {{0.0, 5.0, 13.0, 0.0}});
{{0, 0}, {5, 12}, {4, 3}}, {{0, 1}}, {{0.0, 13.0, 5.0, 0.0}});
}

TYPED_TEST(HausdorffTest, TwoShortSpaces2)
Expand All @@ -94,13 +94,13 @@ TYPED_TEST(HausdorffTest, TwoShortSpaces2)
{{1, 1}, {5, 12}, {4, 3}, {2, 8}, {3, 4}, {7, 7}},
{{0, 3, 4}},
{{0.0,
7.0710678118654755,
5.3851648071345037,
5.0000000000000000,
0.0,
4.1231056256176606,
5.0,
7.0710678118654755,
0.0,
5.0990195135927854,
5.3851648071345037,
4.1231056256176606,
0.0}});
}

Expand All @@ -121,12 +121,12 @@ TYPED_TEST(HausdorffTest, ThreeSpacesLengths543)
{4.0, 6.0}},
{{0, 5, 9}},
{{0.0000000000000000,
4.1231056256176606,
4.0000000000000000,
3.6055512754639896,
4.4721359549995796,
4.1231056256176606,
0.0000000000000000,
1.4142135623730951,
4.4721359549995796,
4.0000000000000000,
1.4142135623730951,
0.0000000000000000}});
}
Expand Down
10 changes: 7 additions & 3 deletions cpp/tests/spatial/hausdorff_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include <cudf_test/column_utilities.hpp>
#include <cudf_test/column_wrapper.hpp>
#include <cudf_test/cudf_gtest.hpp>
#include <cudf_test/table_utilities.hpp>
#include <cudf_test/type_lists.hpp>

#include <thrust/iterator/constant_iterator.h>
Expand Down Expand Up @@ -49,11 +50,14 @@ TYPED_TEST(HausdorffTest, Empty)
auto y = cudf::test::fixed_width_column_wrapper<T>({});
auto space_offsets = cudf::test::fixed_width_column_wrapper<uint32_t>({});

auto expected = cudf::test::fixed_width_column_wrapper<T>({});
auto expected_col = cudf::test::fixed_width_column_wrapper<T>({});
auto expected_view = cudf::table_view{};

auto actual = cuspatial::directed_hausdorff_distance(x, y, space_offsets, this->mr());
auto [actual_col, actual_view] =
cuspatial::directed_hausdorff_distance(x, y, space_offsets, this->mr());

expect_columns_equivalent(expected, actual->view(), verbosity);
expect_columns_equivalent(expected_col, actual_col->view(), verbosity);
CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected_view, actual_view);
}

TYPED_TEST(HausdorffTest, MoreSpacesThanPoints)
Expand Down
8 changes: 8 additions & 0 deletions python/cuspatial/benchmarks/api/bench_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,14 @@ def bench_directed_hausdorff_distance(benchmark, sorted_trajectories):
benchmark(cuspatial.directed_hausdorff_distance, s)


def bench_directed_hausdorff_distance_many_spaces(benchmark):
spaces = 10000
coords = cupy.zeros((spaces * 2,))
offsets = cupy.arange(spaces + 1, dtype="int32")
s = cuspatial.GeoSeries.from_multipoints_xy(coords, offsets)
benchmark(cuspatial.directed_hausdorff_distance, s)


def bench_haversine_distance(benchmark, gpu_dataframe):
coords_first = gpu_dataframe["geometry"][0:10].polygons.xy[0:1000]
coords_second = gpu_dataframe["geometry"][10:20].polygons.xy[0:1000]
Expand Down
Original file line number Diff line number Diff line change
@@ -1,15 +1,17 @@
# Copyright (c) 2020, NVIDIA CORPORATION.

from libcpp.memory cimport unique_ptr
from libcpp.utility cimport pair

from cudf._lib.cpp.column.column cimport column
from cudf._lib.cpp.column.column_view cimport column_view
from cudf._lib.cpp.table.table_view cimport table_view


cdef extern from "cuspatial/distance/hausdorff.hpp" \
namespace "cuspatial" nogil:

cdef unique_ptr[column] directed_hausdorff_distance(
cdef pair[unique_ptr[column], table_view] directed_hausdorff_distance(
const column_view& xs,
const column_view& ys,
const column_view& space_offsets
Expand Down
13 changes: 10 additions & 3 deletions python/cuspatial/cuspatial/_lib/hausdorff.pyx
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
# Copyright (c) 2019, NVIDIA CORPORATION.

from libcpp.memory cimport unique_ptr
from libcpp.utility cimport move
from libcpp.utility cimport move, pair

from cudf._lib.column cimport Column, column, column_view
from cudf._lib.cpp.table.table_view cimport table_view
from cudf._lib.utils cimport columns_from_table_view

from cuspatial._lib.cpp.distance.hausdorff cimport (
directed_hausdorff_distance as directed_cpp_hausdorff_distance,
Expand All @@ -19,7 +21,7 @@ def directed_hausdorff_distance(
cdef column_view c_ys = ys.view()
cdef column_view c_shape_offsets = space_offsets.view()

cdef unique_ptr[column] result
cdef pair[unique_ptr[column], table_view] result

with nogil:
result = move(
Expand All @@ -30,4 +32,9 @@ def directed_hausdorff_distance(
)
)

return Column.from_unique_ptr(move(result))
owner = Column.from_unique_ptr(move(result.first), data_ptr_exposed=True)

return columns_from_table_view(
result.second,
owners=[owner] * result.second.num_columns()
)
9 changes: 3 additions & 6 deletions python/cuspatial/cuspatial/core/spatial/distance.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,8 @@ def directed_hausdorff_distance(multipoints: GeoSeries):
1 2.0 0.000000
"""

if len(multipoints) == 0:
num_spaces = len(multipoints)
if num_spaces == 0:
return DataFrame()

if not contains_only_multipoints(multipoints):
Expand All @@ -92,11 +93,7 @@ def directed_hausdorff_distance(multipoints: GeoSeries):
as_column(multipoints.multipoints.geometry_offset[:-1]),
)

num_spaces = len(multipoints)
with cudf.core.buffer.acquire_spill_lock():
result = result.data_array_view(mode="read")
result = result.reshape(num_spaces, num_spaces)
return DataFrame(result)
return DataFrame._from_columns(result, range(num_spaces))


def haversine_distance(p1: GeoSeries, p2: GeoSeries):
Expand Down