From 7d5c45f14816f3fb7451bdb3246c54da448bc708 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Thu, 9 Feb 2023 21:16:36 -0800 Subject: [PATCH 1/6] initial --- cpp/include/cuspatial/distance/hausdorff.hpp | 3 +- cpp/src/spatial/hausdorff.cu | 38 ++++++++++++------- cpp/tests/spatial/hausdorff_test.cpp | 10 +++-- .../cuspatial/_lib/cpp/distance/hausdorff.pxd | 4 +- python/cuspatial/cuspatial/_lib/hausdorff.pyx | 13 +++++-- .../cuspatial/core/spatial/distance.py | 6 +-- 6 files changed, 48 insertions(+), 26 deletions(-) diff --git a/cpp/include/cuspatial/distance/hausdorff.hpp b/cpp/include/cuspatial/distance/hausdorff.hpp index 44d829061..00e33f38a 100644 --- a/cpp/include/cuspatial/distance/hausdorff.hpp +++ b/cpp/include/cuspatial/distance/hausdorff.hpp @@ -16,6 +16,7 @@ #pragma once +#include #include #include @@ -87,7 +88,7 @@ namespace cuspatial { * * @note Hausdorff distances are asymmetrical */ -std::unique_ptr directed_hausdorff_distance( +std::pair, cudf::table_view> directed_hausdorff_distance( cudf::column_view const& xs, cudf::column_view const& ys, cudf::column_view const& space_offsets, diff --git a/cpp/src/spatial/hausdorff.cu b/cpp/src/spatial/hausdorff.cu index 509d6e070..4fa4699de 100644 --- a/cpp/src/spatial/hausdorff.cu +++ b/cpp/src/spatial/hausdorff.cu @@ -22,7 +22,9 @@ #include #include #include +#include #include +#include #include #include #include @@ -33,6 +35,7 @@ #include #include #include +#include #include #include @@ -41,19 +44,21 @@ namespace { struct hausdorff_functor { template - std::enable_if_t::value, std::unique_ptr> operator()( - Args&&...) + std::enable_if_t::value, + std::pair, cudf::table_view>> + operator()(Args&&...) { CUSPATIAL_FAIL("Non-floating point operation is not supported"); } template - std::enable_if_t::value, std::unique_ptr> operator()( - cudf::column_view const& xs, - cudf::column_view const& ys, - cudf::column_view const& space_offsets, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + std::enable_if_t::value, + std::pair, cudf::table_view>> + operator()(cudf::column_view const& xs, + cudf::column_view const& ys, + cudf::column_view const& space_offsets, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { auto const num_points = static_cast(xs.size()); auto const num_spaces = static_cast(space_offsets.size()); @@ -66,7 +71,7 @@ struct hausdorff_functor { auto result = cudf::make_fixed_width_column( cudf::data_type{tid}, num_results, cudf::mask_state::UNALLOCATED, stream, mr); - if (result->size() == 0) { return result; } + if (result->size() == 0) { return {std::move(result), cudf::table_view{}}; } auto const result_view = result->mutable_view(); @@ -80,7 +85,11 @@ struct hausdorff_functor { result_view.begin(), stream); - return result; + thrust::host_vector splits(num_spaces - 1); + thrust::sequence(thrust::host, splits.begin(), splits.end(), num_spaces, num_spaces); + + return {std::move(result), + cudf::table_view(cudf::detail::split(result->view(), splits, stream))}; } }; @@ -88,10 +97,11 @@ struct hausdorff_functor { namespace cuspatial { -std::unique_ptr directed_hausdorff_distance(cudf::column_view const& xs, - cudf::column_view const& ys, - cudf::column_view const& space_offsets, - rmm::mr::device_memory_resource* mr) +std::pair, cudf::table_view> directed_hausdorff_distance( + cudf::column_view const& xs, + cudf::column_view const& ys, + cudf::column_view const& space_offsets, + rmm::mr::device_memory_resource* mr) { CUSPATIAL_EXPECTS(xs.type() == ys.type(), "Inputs `xs` and `ys` must have same type."); CUSPATIAL_EXPECTS(xs.size() == ys.size(), "Inputs `xs` and `ys` must have same length."); diff --git a/cpp/tests/spatial/hausdorff_test.cpp b/cpp/tests/spatial/hausdorff_test.cpp index 9a165a652..2f005e39f 100644 --- a/cpp/tests/spatial/hausdorff_test.cpp +++ b/cpp/tests/spatial/hausdorff_test.cpp @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -49,11 +50,14 @@ TYPED_TEST(HausdorffTest, Empty) auto y = cudf::test::fixed_width_column_wrapper({}); auto space_offsets = cudf::test::fixed_width_column_wrapper({}); - auto expected = cudf::test::fixed_width_column_wrapper({}); + auto expected_col = cudf::test::fixed_width_column_wrapper({}); + auto expected_view = cudf::table_view{}; - auto actual = cuspatial::directed_hausdorff_distance(x, y, space_offsets, this->mr()); + auto [actual_col, actual_view] = + cuspatial::directed_hausdorff_distance(x, y, space_offsets, this->mr()); - expect_columns_equivalent(expected, actual->view(), verbosity); + expect_columns_equivalent(expected_col, actual_col->view(), verbosity); + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected_view, actual_view); } TYPED_TEST(HausdorffTest, MoreSpacesThanPoints) diff --git a/python/cuspatial/cuspatial/_lib/cpp/distance/hausdorff.pxd b/python/cuspatial/cuspatial/_lib/cpp/distance/hausdorff.pxd index 6f5fc69e3..6b6f81d5c 100644 --- a/python/cuspatial/cuspatial/_lib/cpp/distance/hausdorff.pxd +++ b/python/cuspatial/cuspatial/_lib/cpp/distance/hausdorff.pxd @@ -1,15 +1,17 @@ # Copyright (c) 2020, NVIDIA CORPORATION. from libcpp.memory cimport unique_ptr +from libcpp.utility cimport pair from cudf._lib.cpp.column.column cimport column from cudf._lib.cpp.column.column_view cimport column_view +from cudf._lib.cpp.table.table_view cimport table_view cdef extern from "cuspatial/distance/hausdorff.hpp" \ namespace "cuspatial" nogil: - cdef unique_ptr[column] directed_hausdorff_distance( + cdef pair[unique_ptr[column], table_view] directed_hausdorff_distance( const column_view& xs, const column_view& ys, const column_view& space_offsets diff --git a/python/cuspatial/cuspatial/_lib/hausdorff.pyx b/python/cuspatial/cuspatial/_lib/hausdorff.pyx index 7465efc7a..caa5121d0 100644 --- a/python/cuspatial/cuspatial/_lib/hausdorff.pyx +++ b/python/cuspatial/cuspatial/_lib/hausdorff.pyx @@ -1,9 +1,11 @@ # Copyright (c) 2019, NVIDIA CORPORATION. from libcpp.memory cimport unique_ptr -from libcpp.utility cimport move +from libcpp.utility cimport move, pair from cudf._lib.column cimport Column, column, column_view +from cudf._lib.cpp.table.table_view cimport table_view +from cudf._lib.utils cimport columns_from_table_view from cuspatial._lib.cpp.distance.hausdorff cimport ( directed_hausdorff_distance as directed_cpp_hausdorff_distance, @@ -19,7 +21,7 @@ def directed_hausdorff_distance( cdef column_view c_ys = ys.view() cdef column_view c_shape_offsets = space_offsets.view() - cdef unique_ptr[column] result + cdef pair[unique_ptr[column], table_view] result with nogil: result = move( @@ -30,4 +32,9 @@ def directed_hausdorff_distance( ) ) - return Column.from_unique_ptr(move(result)) + owner = Column.from_unique_ptr(move(result.first), data_ptr_exposed=True) + + return columns_from_table_view( + result.second, + owners=[owner] * result.second.num_columns() + ) diff --git a/python/cuspatial/cuspatial/core/spatial/distance.py b/python/cuspatial/cuspatial/core/spatial/distance.py index f789e4425..fd3aa7821 100644 --- a/python/cuspatial/cuspatial/core/spatial/distance.py +++ b/python/cuspatial/cuspatial/core/spatial/distance.py @@ -91,10 +91,8 @@ def directed_hausdorff_distance(xs, ys, space_offsets): ys, as_column(space_offsets, dtype="uint32"), ) - with cudf.core.buffer.acquire_spill_lock(): - result = result.data_array_view(mode="read") - result = result.reshape(num_spaces, num_spaces) - return DataFrame(result) + + return DataFrame._from_columns(result, range(num_spaces)).T def haversine_distance(p1_lon, p1_lat, p2_lon, p2_lat): From 8996586f08341c1b58079d5c693820f4aa09b6d3 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Fri, 24 Feb 2023 13:45:10 -0800 Subject: [PATCH 2/6] update with transpose order --- cpp/include/cuspatial/experimental/detail/hausdorff.cuh | 2 +- python/cuspatial/cuspatial/core/spatial/distance.py | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/cpp/include/cuspatial/experimental/detail/hausdorff.cuh b/cpp/include/cuspatial/experimental/detail/hausdorff.cuh index 8e173b336..f4ea971af 100644 --- a/cpp/include/cuspatial/experimental/detail/hausdorff.cuh +++ b/cpp/include/cuspatial/experimental/detail/hausdorff.cuh @@ -125,7 +125,7 @@ __global__ void kernel_hausdorff( } // determine the output offset for this pair of spaces (LHS, RHS) - Index output_idx = lhs_space_idx * num_spaces + rhs_space_idx; + Index output_idx = rhs_space_idx * num_spaces + lhs_space_idx; // use atomicMax to find the maximum of the minimum distance calculated for each space pair. atomicMax(&thrust::raw_reference_cast(*(results + output_idx)), diff --git a/python/cuspatial/cuspatial/core/spatial/distance.py b/python/cuspatial/cuspatial/core/spatial/distance.py index 271f0ecf2..e0fc12f4a 100644 --- a/python/cuspatial/cuspatial/core/spatial/distance.py +++ b/python/cuspatial/cuspatial/core/spatial/distance.py @@ -77,7 +77,8 @@ def directed_hausdorff_distance(multipoints: GeoSeries): 1 2.0 0.000000 """ - if len(multipoints) == 0: + num_spaces = len(multipoints) + if num_spaces == 0: return DataFrame() if not contains_only_multipoints(multipoints): @@ -89,7 +90,7 @@ def directed_hausdorff_distance(multipoints: GeoSeries): as_column(multipoints.multipoints.geometry_offset[:-1]), ) - return DataFrame._from_columns(result, range(num_spaces)).T + return DataFrame._from_columns(result, range(num_spaces)) def haversine_distance(p1: GeoSeries, p2: GeoSeries): From 7d929992a9f0052aa7547f070ca06baf050a08b0 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Fri, 24 Feb 2023 14:31:45 -0800 Subject: [PATCH 3/6] hand roll split logic --- cpp/src/spatial/hausdorff.cu | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/cpp/src/spatial/hausdorff.cu b/cpp/src/spatial/hausdorff.cu index 4fa4699de..02940db64 100644 --- a/cpp/src/spatial/hausdorff.cu +++ b/cpp/src/spatial/hausdorff.cu @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -35,6 +36,7 @@ #include #include #include +#include #include #include @@ -42,6 +44,27 @@ namespace { +/** + * @brief Split `col` into equal size chunks, each has `size`. + * + * @note only applicable to fixed width type. + * @note only applicable to columns of `size*size`. + */ +template +std::vector split_by_size(cudf::column_view const& col, cudf::size_type size) +{ + std::vector res; + cudf::size_type num_splits = col.size() / size; + std::transform(thrust::counting_iterator(0), + thrust::counting_iterator(num_splits), + std::back_inserter(res), + [size, num_splits, &col](int i) { + return cudf::column_view( + col.type(), size, col.data(), nullptr, 0, size * i, {}); + }); + return res; +} + struct hausdorff_functor { template std::enable_if_t::value, @@ -85,11 +108,7 @@ struct hausdorff_functor { result_view.begin(), stream); - thrust::host_vector splits(num_spaces - 1); - thrust::sequence(thrust::host, splits.begin(), splits.end(), num_spaces, num_spaces); - - return {std::move(result), - cudf::table_view(cudf::detail::split(result->view(), splits, stream))}; + return {std::move(result), cudf::table_view(split_by_size(result->view(), num_spaces))}; } }; From 401965b8d49d45461f4d2d48d59f47bc405e24f1 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Fri, 24 Feb 2023 14:39:22 -0800 Subject: [PATCH 4/6] doc --- cpp/include/cuspatial/distance/hausdorff.hpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/cpp/include/cuspatial/distance/hausdorff.hpp b/cpp/include/cuspatial/distance/hausdorff.hpp index 00e33f38a..27f4aa600 100644 --- a/cpp/include/cuspatial/distance/hausdorff.hpp +++ b/cpp/include/cuspatial/distance/hausdorff.hpp @@ -71,15 +71,18 @@ namespace cuspatial { * : 2 : 2 : 0 : : 2 : : : * +----------+----+-------+ +---------+---+------+ * - * returned as concatenation of columns - * [0 2 4 3 0 2 9 6 0] + * Returns: + * column: [0 4 2 9 0 6 3 2 0] + * table_view: [0 4 2] [9 0 6] [3 2 0] + * * ``` * * @param[in] xs: x component of points * @param[in] ys: y component of points * @param[in] space_offsets: beginning index of each space, plus the last space's end offset. * - * @returns Hausdorff distances for each pair of spaces + * @returns An owning object of the result of the hausdorff distances. + * A table view containing the split view for each input space. * * @throw cudf::cuda_error if `xs` and `ys` lengths differ * @throw cudf::cuda_error if `xs` and `ys` types differ From d2c4706e51b7c607cc96733e996187d87f5c8c82 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Fri, 24 Feb 2023 14:50:52 -0800 Subject: [PATCH 5/6] update tests --- .../experimental/spatial/hausdorff_test.cu | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/cpp/tests/experimental/spatial/hausdorff_test.cu b/cpp/tests/experimental/spatial/hausdorff_test.cu index c7b10ea7b..a1d668760 100644 --- a/cpp/tests/experimental/spatial/hausdorff_test.cu +++ b/cpp/tests/experimental/spatial/hausdorff_test.cu @@ -71,7 +71,7 @@ TYPED_TEST(HausdorffTest, Simple) this->template test, uint32_t>( {{0, 0}, {1, 0}, {0, 1}, {0, 2}}, {{0, 2}}, - {{0.0, static_cast(std::sqrt(2.0)), 2.0, 0.0}}); + {{0.0, 2.0, static_cast(std::sqrt(2.0)), 0.0}}); } TYPED_TEST(HausdorffTest, SingleTrajectorySinglePoint) @@ -85,7 +85,7 @@ TYPED_TEST(HausdorffTest, SingleTrajectorySinglePoint) TYPED_TEST(HausdorffTest, TwoShortSpaces) { this->template test, uint32_t>( - {{0, 0}, {5, 12}, {4, 3}}, {{0, 1}}, {{0.0, 5.0, 13.0, 0.0}}); + {{0, 0}, {5, 12}, {4, 3}}, {{0, 1}}, {{0.0, 13.0, 5.0, 0.0}}); } TYPED_TEST(HausdorffTest, TwoShortSpaces2) @@ -94,13 +94,13 @@ TYPED_TEST(HausdorffTest, TwoShortSpaces2) {{1, 1}, {5, 12}, {4, 3}, {2, 8}, {3, 4}, {7, 7}}, {{0, 3, 4}}, {{0.0, - 7.0710678118654755, - 5.3851648071345037, 5.0000000000000000, - 0.0, - 4.1231056256176606, 5.0, + 7.0710678118654755, + 0.0, 5.0990195135927854, + 5.3851648071345037, + 4.1231056256176606, 0.0}}); } @@ -121,12 +121,12 @@ TYPED_TEST(HausdorffTest, ThreeSpacesLengths543) {4.0, 6.0}}, {{0, 5, 9}}, {{0.0000000000000000, - 4.1231056256176606, - 4.0000000000000000, 3.6055512754639896, + 4.4721359549995796, + 4.1231056256176606, 0.0000000000000000, 1.4142135623730951, - 4.4721359549995796, + 4.0000000000000000, 1.4142135623730951, 0.0000000000000000}}); } From ccc70c5552b85398a0dbd7fa9e759a265fcbec97 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Mon, 10 Apr 2023 18:33:07 -0700 Subject: [PATCH 6/6] add many column benchmark --- python/cuspatial/benchmarks/api/bench_api.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/python/cuspatial/benchmarks/api/bench_api.py b/python/cuspatial/benchmarks/api/bench_api.py index 771e27f3f..ccc899f14 100644 --- a/python/cuspatial/benchmarks/api/bench_api.py +++ b/python/cuspatial/benchmarks/api/bench_api.py @@ -107,6 +107,14 @@ def bench_directed_hausdorff_distance(benchmark, sorted_trajectories): benchmark(cuspatial.directed_hausdorff_distance, s) +def bench_directed_hausdorff_distance_many_spaces(benchmark): + spaces = 10000 + coords = cupy.zeros((spaces * 2,)) + offsets = cupy.arange(spaces + 1, dtype="int32") + s = cuspatial.GeoSeries.from_multipoints_xy(coords, offsets) + benchmark(cuspatial.directed_hausdorff_distance, s) + + def bench_haversine_distance(benchmark, gpu_dataframe): coords_first = gpu_dataframe["geometry"][0:10].polygons.xy[0:1000] coords_second = gpu_dataframe["geometry"][10:20].polygons.xy[0:1000]