Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve performance of quadtree point-to-polyline join #362

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
a5f47c0
implement quadtree_point_to_nearest_polyline with thrust in the style…
trxcllnt Feb 25, 2021
ca2e630
free intermediate local_point_offsets in quadtree_point_in_polygon
trxcllnt Feb 25, 2021
d90ac5f
Merge branch 'branch-0.19' of github.com:rapidsai/cuspatial into fix/…
trxcllnt Mar 31, 2021
b843069
update for set_element_async API change
trxcllnt Mar 31, 2021
e79caa9
add p2np iterator that yields point/polyline/distances in an order co…
trxcllnt Apr 7, 2021
e07483b
clean up p2np includes
trxcllnt Apr 7, 2021
7cf6461
Merge branch 'branch-0.19' of github.com:rapidsai/cuspatial into fix/…
trxcllnt Apr 8, 2021
c061759
use thrust binary searches instead of linear search
trxcllnt Apr 8, 2021
1fa3514
fix GCC 9 RVO warning/error
trxcllnt Apr 9, 2021
cc1cb96
use new Thrust 1.12.0 make_zip_iterator instead of our custom helper
trxcllnt Apr 9, 2021
5a33ca7
update copyrights
trxcllnt Apr 9, 2021
3722176
Merge branch 'branch-0.20' of github.com:rapidsai/cuspatial into fix/…
trxcllnt Apr 9, 2021
081ba35
Merge branch 'fix/thrust-1.12.0' into fix/quadtree-point-to-nearest-p…
trxcllnt Apr 9, 2021
782af8a
scatter reduced point/poly/distances into place
trxcllnt Apr 12, 2021
2cdc33a
deterministically handle case where distances between a point and mul…
trxcllnt Apr 12, 2021
dfe0b3e
Merge branch 'branch-0.20' of github.com:rapidsai/cuspatial into fix/…
trxcllnt Apr 12, 2021
26253fa
use C++17 and cuDF 0.20.*
trxcllnt Apr 13, 2021
e92a56a
update README.md with latest features + markdown lint fixes
trxcllnt Apr 13, 2021
2c16c0a
Merge branch 'fix/use-c++17' into fix/quadtree-point-to-nearest-polyl…
trxcllnt Apr 13, 2021
5e8661a
sort the results of quad bbox join by quad offset
trxcllnt Apr 13, 2021
3a765d3
Merge branch 'branch-0.20' of github.com:rapidsai/cuspatial into fix/…
trxcllnt Apr 13, 2021
d70526e
Merge branch 'branch-0.20' of github.com:rapidsai/cuspatial into fix/…
trxcllnt Apr 14, 2021
e329dd6
fix spatial join python tests for new sorting order
trxcllnt Apr 14, 2021
53adc9e
Merge branch 'branch-0.20' of github.com:rapidsai/cuspatial into fix/…
trxcllnt May 6, 2021
43bbd91
use C++17 structured binding
trxcllnt May 6, 2021
2fb630e
avoid the copies incurred by shrinking the device_uvectors
trxcllnt May 6, 2021
ced903b
remove dead code
trxcllnt May 6, 2021
f523461
call cudaMemsetAsync with an int instead of a float/double
trxcllnt May 6, 2021
1bc89b4
factor common code into point.cuh
trxcllnt May 12, 2021
82f0569
Apply suggestions from code review
trxcllnt May 18, 2021
c663e5e
fix lint
trxcllnt May 18, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 44 additions & 0 deletions cpp/src/join/detail/point.cuh
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include <thrust/binary_search.h>
#include <thrust/distance.h>

namespace cuspatial {
namespace detail {

inline __device__ std::pair<uint32_t, uint32_t> get_quad_poly_and_local_point_indices(
uint32_t const global_index, uint32_t const *point_offsets, uint32_t const *point_offsets_end)
{
// Calculate the position in "point_offsets" that `global_index` falls between.
// This position is the index of the poly/quad pair for this `global_index`.
//
// Dereferencing `local_point_offset` yields the zero-based first point position of this
// quadrant. Adding this zero-based position to the quadrant's first point position in the
// quadtree yields the "global" position in the `point_indices` map.
auto const local_point_offset =
thrust::upper_bound(thrust::seq, point_offsets, point_offsets_end, global_index) - 1;
return std::make_pair(
// quad_poly_index
thrust::distance(point_offsets, local_point_offset),
// local_point_index
global_index - *local_point_offset);
}

} // namespace detail
} // namespace cuspatial
139 changes: 71 additions & 68 deletions cpp/src/join/quadtree_point_in_polygon.cu
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
* limitations under the License.
*/

#include "detail/point.cuh"

#include <indexing/construction/detail/utilities.cuh>
#include <utility/point_in_polygon.cuh>

Expand Down Expand Up @@ -45,23 +47,16 @@ namespace {
template <typename T, typename QuadOffsetsIter>
struct compute_poly_and_point_indices {
QuadOffsetsIter quad_point_offsets;
uint32_t const *local_point_offsets;
size_t const num_local_point_offsets;
uint32_t const *point_offsets;
uint32_t const *point_offsets_end;
cudf::column_device_view const poly_indices;
thrust::tuple<uint32_t, uint32_t> __device__ operator()(cudf::size_type const i)
thrust::tuple<uint32_t, uint32_t> __device__ operator()(cudf::size_type const global_index)
{
// Calculate the position in "local_point_offsets" that `i` falls between.
// This position is the index of the poly/quad pair for this `i`.
//
// Dereferencing `local_point_offset` yields the zero-based first point position of this
// quadrant. Adding this zero-based position to the quadrant's first point position in the
// quadtree yields the "global" position in the `point_indices` map.
auto po_begin = local_point_offsets;
auto po_end = local_point_offsets + num_local_point_offsets;
auto const local_point_offset = thrust::upper_bound(thrust::seq, po_begin, po_end, i) - 1;
uint32_t const pairs_idx = thrust::distance(local_point_offsets, local_point_offset);
uint32_t const point_idx = quad_point_offsets[pairs_idx] + (i - *local_point_offset);
uint32_t const poly_idx = poly_indices.element<uint32_t>(pairs_idx);
// uint32_t quad_poly_index, local_point_index;
auto const [quad_poly_index, local_point_index] =
get_quad_poly_and_local_point_indices(global_index, point_offsets, point_offsets_end);
uint32_t const point_idx = quad_point_offsets[quad_poly_index] + local_point_index;
uint32_t const poly_idx = poly_indices.element<uint32_t>(quad_poly_index);
return thrust::make_tuple(poly_idx, point_idx);
}
};
Expand Down Expand Up @@ -111,63 +106,71 @@ struct compute_quadtree_point_in_polygon {
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource *mr)
{
auto quad_lengths = quadtree.column(3);
auto quad_offsets = quadtree.column(4);
auto poly_indices = poly_quad_pairs.column(0);
auto quad_indices = poly_quad_pairs.column(1);
auto num_poly_quad_pairs = poly_quad_pairs.num_rows();

auto counting_iter = thrust::make_counting_iterator(0);
auto quad_lengths_iter = thrust::make_permutation_iterator(quad_lengths.begin<uint32_t>(),
quad_indices.begin<uint32_t>());

auto quad_offsets_iter = thrust::make_permutation_iterator(quad_offsets.begin<uint32_t>(),
quad_indices.begin<uint32_t>());

// Compute a "local" set of zero-based point offsets from number of points in each quadrant
// Use `num_poly_quad_pairs + 1` as the length so that the last element produced by
// `inclusive_scan` is the total number of points to be tested against any polygon.
rmm::device_uvector<uint32_t> local_point_offsets(num_poly_quad_pairs + 1, stream);

thrust::inclusive_scan(rmm::exec_policy(stream),
quad_lengths_iter,
quad_lengths_iter + num_poly_quad_pairs,
local_point_offsets.begin() + 1);

// Ensure local point offsets starts at 0
uint32_t init{0};
local_point_offsets.set_element_async(0, init, stream);

// The last element is the total number of points to test against any polygon.
auto num_total_points = local_point_offsets.back_element(stream);
// Wrapped in an IIFE so `local_point_offsets` is freed on return
auto [poly_idxs, point_idxs, num_total_points] = [&]() {
auto quad_lengths = quadtree.column(3);
auto quad_offsets = quadtree.column(4);
auto poly_indices = poly_quad_pairs.column(0);
auto quad_indices = poly_quad_pairs.column(1);
auto num_poly_quad_pairs = poly_quad_pairs.num_rows();

auto counting_iter = thrust::make_counting_iterator(0);
auto quad_lengths_iter = thrust::make_permutation_iterator(quad_lengths.begin<uint32_t>(),
quad_indices.begin<uint32_t>());

auto quad_offsets_iter = thrust::make_permutation_iterator(quad_offsets.begin<uint32_t>(),
quad_indices.begin<uint32_t>());

// Compute a "local" set of zero-based point offsets from number of points in each quadrant
// Use `num_poly_quad_pairs + 1` as the length so that the last element produced by
// `inclusive_scan` is the total number of points to be tested against any polygon.
rmm::device_uvector<uint32_t> local_point_offsets(num_poly_quad_pairs + 1, stream);

thrust::inclusive_scan(rmm::exec_policy(stream),
quad_lengths_iter,
quad_lengths_iter + num_poly_quad_pairs,
local_point_offsets.begin() + 1);

// Ensure local point offsets starts at 0
uint32_t init{0};
local_point_offsets.set_element_async(0, init, stream);

// The last element is the total number of points to test against any polygon.
auto num_total_points = local_point_offsets.back_element(stream);
trxcllnt marked this conversation as resolved.
Show resolved Hide resolved

// Allocate memory for the polygon and point index pairs
rmm::device_uvector<uint32_t> poly_idxs(num_total_points, stream);
rmm::device_uvector<uint32_t> point_idxs(num_total_points, stream);

auto poly_and_point_indices =
thrust::make_zip_iterator(poly_idxs.begin(), point_idxs.begin());

// Compute the combination of polygon and point index pairs. For each polygon/quadrant pair,
// enumerate pairs of (poly_index, point_index) for each point in each quadrant.
//
// In Python pseudocode:
// ```
// pp_pairs = []
// for polygon, quadrant in pq_pairs:
// for point in quadrant:
// pp_pairs.append((polygon, point))
// ```
//
thrust::transform(rmm::exec_policy(stream),
counting_iter,
counting_iter + num_total_points,
poly_and_point_indices,
compute_poly_and_point_indices<T, decltype(quad_offsets_iter)>{
quad_offsets_iter,
local_point_offsets.begin(),
local_point_offsets.end(),
*cudf::column_device_view::create(poly_indices, stream)});

// Allocate memory for the polygon and point index pairs
rmm::device_uvector<uint32_t> poly_idxs(num_total_points, stream);
rmm::device_uvector<uint32_t> point_idxs(num_total_points, stream);
return std::make_tuple(std::move(poly_idxs), std::move(point_idxs), num_total_points);
}();

auto poly_and_point_indices = thrust::make_zip_iterator(poly_idxs.begin(), point_idxs.begin());

// Compute the combination of polygon and point index pairs. For each polygon/quadrant pair,
// enumerate pairs of (poly_index, point_index) for each point in each quadrant.
//
// In Python pseudocode:
// ```
// pp_pairs = []
// for polygon, quadrant in pq_pairs:
// for point in quadrant:
// pp_pairs.append((polygon, point))
// ```
//
thrust::transform(rmm::exec_policy(stream),
counting_iter,
counting_iter + num_total_points,
poly_and_point_indices,
compute_poly_and_point_indices<T, decltype(quad_offsets_iter)>{
quad_offsets_iter,
local_point_offsets.begin(),
local_point_offsets.size() - 1,
*cudf::column_device_view::create(poly_indices, stream)});

// Enumerate the point X/Ys using the sorted `point_indices` (from quadtree construction)
auto point_xys_iter = thrust::make_permutation_iterator(
thrust::make_zip_iterator(point_x.begin<T>(), point_y.begin<T>()),
Expand Down
Loading