From a5f47c05f954c04de2b8d4abbc2cfac69077365b Mon Sep 17 00:00:00 2001 From: ptaylor Date: Thu, 25 Feb 2021 00:41:18 -0600 Subject: [PATCH 01/22] implement quadtree_point_to_nearest_polyline with thrust in the style of quadtree_point_in_polygon to get a 5x perf boost --- .../quadtree_point_to_nearest_polyline.cu | 308 +++++++++--------- cpp/src/utility/point_to_nearest_polyline.cuh | 61 ++++ 2 files changed, 216 insertions(+), 153 deletions(-) create mode 100644 cpp/src/utility/point_to_nearest_polyline.cuh diff --git a/cpp/src/join/quadtree_point_to_nearest_polyline.cu b/cpp/src/join/quadtree_point_to_nearest_polyline.cu index 8d9c025b9..a9fa2c015 100644 --- a/cpp/src/join/quadtree_point_to_nearest_polyline.cu +++ b/cpp/src/join/quadtree_point_to_nearest_polyline.cu @@ -15,11 +15,11 @@ */ #include +#include #include #include -#include #include #include #include @@ -30,99 +30,53 @@ #include #include -#include -#include +#include +#include +#include #include -#include +#include #include - -#include +#include +#include namespace cuspatial { namespace detail { namespace { -constexpr uint32_t threads_per_block = 256; - -template -__global__ void find_nearest_polyline_kernel( - uint32_t const *quad_idxs, // point quadrant id array -base 0 - uint32_t const num_poly_idx_offsets, // number of polyline index offsets - uint32_t const *poly_idx_offsets, // starting positions of the first polyline idx - uint32_t const num_poly_idxs, // number of polyline indices - uint32_t const *poly_idxs, - - cudf::column_device_view const quad_lengths, // numbers of points in each quadrant - cudf::column_device_view const quad_offsets, // offset of first point in each quadrant - cudf::column_device_view const point_indices, - cudf::column_device_view const point_x, - cudf::column_device_view const point_y, - - cudf::column_device_view const poly_offsets, // positions of the first vertex in a polyline - cudf::column_device_view const poly_points_x, - cudf::column_device_view const poly_points_y, - - cudf::mutable_column_device_view out_point_index, - cudf::mutable_column_device_view out_poly_index, - cudf::mutable_column_device_view out_distance) -{ - // each block processes a quadrant - auto const quad_pos = blockIdx.x + gridDim.x * blockIdx.y; - auto const quad_idx = quad_idxs[quad_pos]; - - auto const poly_begin = poly_idx_offsets[quad_pos]; - auto const poly_end = - quad_pos < num_poly_idx_offsets - 1 ? poly_idx_offsets[quad_pos + 1] : num_poly_idxs; - - auto const num_points = quad_lengths.element(quad_idx); - - for (auto tid = threadIdx.x; tid < num_points; tid += blockDim.x) { - // each thread loads its point - auto const point_pos = quad_offsets.element(quad_idx) + tid; - auto const point_id = point_indices.element(point_pos); - auto const px = point_x.element(point_id); - auto const py = point_y.element(point_id); - auto nearest_poly_id = static_cast(-1); - T distance_squared = std::numeric_limits::max(); - - for (uint32_t poly_id = poly_begin; poly_id < poly_end; poly_id++) // for each polyline - { - auto const poly_idx = poly_idxs[poly_id]; - auto const ring_begin = poly_offsets.element(poly_idx); - auto const ring_end = poly_idx < poly_offsets.size() - 1 - ? poly_offsets.element(poly_idx + 1) - : poly_points_x.size(); - auto const ring_len = ring_end - ring_begin; - - for (auto point_idx = 0; point_idx < ring_len; point_idx++) // for each line - { - auto const i0 = ring_begin + ((point_idx + 0) % ring_len); - auto const i1 = ring_begin + ((point_idx + 1) % ring_len); - auto const x0 = poly_points_x.element(i0); - auto const y0 = poly_points_y.element(i0); - auto const x1 = poly_points_x.element(i1); - auto const y1 = poly_points_y.element(i1); - auto const dx0 = px - x0, dy0 = py - y0; - auto const dx1 = px - x1, dy1 = py - y1; - auto const dx2 = x1 - x0, dy2 = y1 - y0; - auto const d0 = dx0 * dx0 + dy0 * dy0; - auto const d1 = dx1 * dx1 + dy1 * dy1; - auto const d2 = dx2 * dx2 + dy2 * dy2; - auto const d3 = dx2 * dx0 + dy2 * dy0; - auto const r = d3 * d3 / d2; - auto const d = d3 <= 0 || r >= d2 ? min(d0, d1) : d0 - r; - if (d < distance_squared) { - distance_squared = d; - nearest_poly_id = poly_idx; - } - } - } - - out_point_index.element(point_pos) = point_pos; - out_poly_index.element(point_pos) = nearest_poly_id; - out_distance.element(point_pos) = sqrt(distance_squared); +template +struct compute_point_poly_indices_and_distances { + PointIter points; + QuadOffsetsIter quad_point_offsets; + uint32_t const *local_point_offsets; + size_t const num_local_point_offsets; + cudf::column_device_view const poly_indices; + cudf::column_device_view const poly_offsets; + cudf::column_device_view const poly_points_x; + cudf::column_device_view const poly_points_y; + thrust::tuple __device__ operator()(cudf::size_type const i) + { + // Calculate the position in "local_point_offsets" that `i` falls between. + // This position is the index of the poly/quad pair for this `i`. + // + // Dereferencing `local_point_offset` yields the zero-based first point position of this + // quadrant. Adding this zero-based position to the quadrant's first point position in the + // quadtree yields the "global" position in the `point_indices` map. + auto po_begin = local_point_offsets; + auto po_end = local_point_offsets + num_local_point_offsets; + auto const local_point_offset = thrust::upper_bound(thrust::seq, po_begin, po_end, i) - 1; + uint32_t const pairs_idx = thrust::distance(local_point_offsets, local_point_offset); + uint32_t const point_idx = quad_point_offsets[pairs_idx] + (i - *local_point_offset); + uint32_t const poly_idx = poly_indices.element(pairs_idx); + auto const &point = points[point_idx]; + auto const distance = point_to_poly_line_distance(thrust::get<0>(point), + thrust::get<1>(point), + poly_idx, + poly_offsets, + poly_points_x, + poly_points_y); + return thrust::make_tuple(point_idx, poly_idx, distance); } -} +}; struct compute_quadtree_point_to_nearest_polyline { template @@ -145,74 +99,122 @@ struct compute_quadtree_point_to_nearest_polyline { rmm::cuda_stream_view stream, rmm::mr::device_memory_resource *mr) { - auto poly_idx = poly_quad_pairs.column(0); - auto quad_idx = poly_quad_pairs.column(1); - auto quad_lengths = quadtree.column(3); - auto quad_offsets = quadtree.column(4); - const uint32_t num_pairs = poly_idx.size(); - - rmm::device_uvector d_poly_idx(num_pairs, stream); - - thrust::copy(rmm::exec_policy(stream), - poly_idx.begin(), - poly_idx.end(), - d_poly_idx.begin()); - - rmm::device_uvector d_quad_idx(num_pairs, stream); - - thrust::copy(rmm::exec_policy(stream), - quad_idx.begin(), - quad_idx.end(), - d_quad_idx.begin()); - - // sort (d_poly_idx, d_quad_idx) using d_quad_idx as key => (quad_idxs, poly_idxs) - thrust::sort_by_key( - rmm::exec_policy(stream), d_quad_idx.begin(), d_quad_idx.end(), d_poly_idx.begin()); - - // reduce_by_key using d_quad_idx as the key - // exclusive_scan on numbers of polygons associated with a quadrant to create d_poly_idx_offsets - - rmm::device_uvector d_poly_idx_offsets(num_pairs, stream); - - uint32_t num_quads = - thrust::distance(d_poly_idx_offsets.begin(), - thrust::reduce_by_key(rmm::exec_policy(stream), - d_quad_idx.begin(), - d_quad_idx.end(), - thrust::constant_iterator(1), - d_quad_idx.begin(), - d_poly_idx_offsets.begin()) - .second); - - d_quad_idx.resize(num_quads, stream); - d_poly_idx_offsets.resize(num_quads, stream); - - thrust::exclusive_scan(rmm::exec_policy(stream), - d_poly_idx_offsets.begin(), - d_poly_idx_offsets.end(), - d_poly_idx_offsets.begin()); - + // Wrapped in an IIFE so `local_point_offsets` is freed on return + auto poly_point_idxs_and_distances = [&]() { + auto quad_lengths = quadtree.column(3); + auto quad_offsets = quadtree.column(4); + auto poly_indices = poly_quad_pairs.column(0); + auto quad_indices = poly_quad_pairs.column(1); + auto num_poly_quad_pairs = poly_quad_pairs.num_rows(); + + auto counting_iter = thrust::make_counting_iterator(0); + auto quad_lengths_iter = thrust::make_permutation_iterator(quad_lengths.begin(), + quad_indices.begin()); + + auto quad_offsets_iter = thrust::make_permutation_iterator(quad_offsets.begin(), + quad_indices.begin()); + + // Compute a "local" set of zero-based point offsets from number of points in each quadrant + // Use `num_poly_quad_pairs + 1` as the length so that the last element produced by + // `inclusive_scan` is the total number of points to be tested against any polyline. + rmm::device_uvector local_point_offsets(num_poly_quad_pairs + 1, stream); + + thrust::inclusive_scan(rmm::exec_policy(stream), + quad_lengths_iter, + quad_lengths_iter + num_poly_quad_pairs, + local_point_offsets.begin() + 1); + + // Ensure local point offsets starts at 0 + local_point_offsets.set_element_async(0, 0, stream); + + // The last element is the total number of points to test against any polyline. + auto num_total_points = local_point_offsets.back_element(stream); + + // Allocate memory for the polyline/point indices and distances + rmm::device_uvector poly_idxs(num_total_points, stream); + rmm::device_uvector point_idxs(num_total_points, stream); + rmm::device_uvector poly_point_distances(num_total_points, stream); + + auto point_poly_indices_and_distances = + make_zip_iterator(point_idxs.begin(), poly_idxs.begin(), poly_point_distances.begin()); + + // Enumerate the point X/Ys using the sorted `point_indices` (from quadtree construction) + auto point_xys_iter = thrust::make_permutation_iterator( + make_zip_iterator(point_x.begin(), point_y.begin()), point_indices.begin()); + + // Compute the combination of point and polyline index pairs. For each polyline/quadrant pair, + // enumerate pairs of (point_index, poly_index) for each point in each quadrant, and calculate + // the minimum distance between each point/poly pair. + // + // In Python pseudocode: + // ``` + // pp_pairs_and_dist = [] + // for polyline, quadrant in pq_pairs: + // for point in quadrant: + // pp_pairs_and_dist.append((point, polyline, min_distance(point, polyline))) + // ``` + // + thrust::transform(rmm::exec_policy(stream), + counting_iter, + counting_iter + num_total_points, + point_poly_indices_and_distances, + compute_point_poly_indices_and_distances{ + point_xys_iter, + quad_offsets_iter, + local_point_offsets.begin(), + local_point_offsets.size() - 1, + *cudf::column_device_view::create(poly_indices, stream), + *cudf::column_device_view::create(poly_offsets, stream), + *cudf::column_device_view::create(poly_points_x, stream), + *cudf::column_device_view::create(poly_points_y, stream)}); + + // sort the point/polyline indices and distances for `reduce_by_key` below + thrust::sort_by_key(rmm::exec_policy(stream), + point_idxs.begin(), + point_idxs.end(), + point_poly_indices_and_distances); + + return std::make_tuple( + std::move(poly_idxs), std::move(point_idxs), std::move(poly_point_distances)); + }(); + + auto &poly_idxs = std::get<0>(poly_point_idxs_and_distances); + auto &point_idxs = std::get<1>(poly_point_idxs_and_distances); + auto &poly_point_distances = std::get<2>(poly_point_idxs_and_distances); + + // Allocate output columns for the point and polyline index pairs and their distances auto point_index_col = make_fixed_width_column(point_x.size(), stream, mr); auto poly_index_col = make_fixed_width_column(point_x.size(), stream, mr); auto distance_col = make_fixed_width_column(point_x.size(), stream, mr); - find_nearest_polyline_kernel<<>>( - d_quad_idx.begin(), - d_poly_idx_offsets.size(), - d_poly_idx_offsets.begin(), - d_poly_idx.size(), - d_poly_idx.begin(), - *cudf::column_device_view::create(quad_lengths, stream), - *cudf::column_device_view::create(quad_offsets, stream), - *cudf::column_device_view::create(point_indices, stream), - *cudf::column_device_view::create(point_x, stream), - *cudf::column_device_view::create(point_y, stream), - *cudf::column_device_view::create(poly_offsets, stream), - *cudf::column_device_view::create(poly_points_x, stream), - *cudf::column_device_view::create(poly_points_y, stream), - *cudf::mutable_column_device_view::create(*point_index_col, stream), - *cudf::mutable_column_device_view::create(*poly_index_col, stream), - *cudf::mutable_column_device_view::create(*distance_col, stream)); + // Fill output distance column with T_MAX because `reduce_by_key` selector is associative + thrust::fill(rmm::exec_policy(stream), + distance_col->mutable_view().template begin(), + distance_col->mutable_view().template end(), + std::numeric_limits::max()); + + // Reduce the intermediate point/poly indices to lists of point/polyline + // index pairs and distances, selecting the polyline index closest for each point. + thrust::reduce_by_key(rmm::exec_policy(stream), + // keys_first + point_idxs.begin(), + // keys_last + point_idxs.end(), + // values_first + make_zip_iterator(poly_idxs.begin(), poly_point_distances.begin()), + // keys_output + point_index_col->mutable_view().begin(), + // values_output + make_zip_iterator(poly_index_col->mutable_view().begin(), + distance_col->mutable_view().template begin()), + // binary_pred + thrust::equal_to(), + // binary_op + [] __device__(auto const &a, auto const &b) { + return thrust::get<1>(a) < thrust::get<1>(b) ? a : b; + }); std::vector> cols{}; cols.reserve(3); @@ -265,7 +267,7 @@ std::unique_ptr quadtree_point_to_nearest_polyline( rmm::mr::device_memory_resource *mr) { CUSPATIAL_EXPECTS(poly_quad_pairs.num_columns() == 2, - "a quadrant-polygon table must have 2 columns"); + "a quadrant-polyline table must have 2 columns"); CUSPATIAL_EXPECTS(quadtree.num_columns() == 5, "a quadtree table must have 5 columns"); CUSPATIAL_EXPECTS(point_indices.size() == point_x.size() && point_x.size() == point_y.size(), "number of points must be the same for both x and y columns"); @@ -274,10 +276,10 @@ std::unique_ptr quadtree_point_to_nearest_polyline( CUSPATIAL_EXPECTS(poly_points_x.size() >= 2 * poly_offsets.size(), "all polylines must have at least two vertices"); CUSPATIAL_EXPECTS(poly_points_x.type() == poly_points_y.type(), - "polygon columns must have the same data type"); + "polyline columns must have the same data type"); CUSPATIAL_EXPECTS(point_x.type() == point_y.type(), "point columns must have the same data type"); CUSPATIAL_EXPECTS(point_x.type() == poly_points_x.type(), - "points and polygons must have the same data type"); + "points and polylines must have the same data type"); if (poly_quad_pairs.num_rows() == 0 || quadtree.num_rows() == 0 || point_indices.size() == 0 || poly_offsets.size() == 0) { diff --git a/cpp/src/utility/point_to_nearest_polyline.cuh b/cpp/src/utility/point_to_nearest_polyline.cuh new file mode 100644 index 000000000..95144072c --- /dev/null +++ b/cpp/src/utility/point_to_nearest_polyline.cuh @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +namespace cuspatial { +namespace detail { + +template +inline __device__ T point_to_poly_line_distance(T const px, + T const py, + cudf::size_type const ring_idx, + cudf::column_device_view const& ring_offsets, + cudf::column_device_view const& poly_points_x, + cudf::column_device_view const& poly_points_y) +{ + T distance_squared = std::numeric_limits::max(); + auto ring_begin = ring_offsets.element(ring_idx); + auto ring_end = ring_idx < ring_offsets.size() - 1 ? ring_offsets.element(ring_idx + 1) + : poly_points_x.size(); + auto ring_len = ring_end - ring_begin; + for (auto point_idx = 0; point_idx < ring_len; ++point_idx) { + auto const i0 = ring_begin + ((point_idx + 0) % ring_len); + auto const i1 = ring_begin + ((point_idx + 1) % ring_len); + auto const x0 = poly_points_x.element(i0); + auto const y0 = poly_points_y.element(i0); + auto const x1 = poly_points_x.element(i1); + auto const y1 = poly_points_y.element(i1); + auto const dx0 = px - x0, dy0 = py - y0; + auto const dx1 = px - x1, dy1 = py - y1; + auto const dx2 = x1 - x0, dy2 = y1 - y0; + auto const d0 = dx0 * dx0 + dy0 * dy0; + auto const d1 = dx1 * dx1 + dy1 * dy1; + auto const d2 = dx2 * dx2 + dy2 * dy2; + auto const d3 = dx2 * dx0 + dy2 * dy0; + auto const r = d3 * d3 / d2; + auto const d = d3 <= 0 || r >= d2 ? min(d0, d1) : d0 - r; + if (d < distance_squared) { distance_squared = d; } + } + + return sqrt(distance_squared); +} + +} // namespace detail +} // namespace cuspatial From ca2e630ce0c93884ef0ecc6aec4a94f6cf090591 Mon Sep 17 00:00:00 2001 From: ptaylor Date: Thu, 25 Feb 2021 01:05:11 -0600 Subject: [PATCH 02/22] free intermediate local_point_offsets in quadtree_point_in_polygon --- cpp/src/join/quadtree_point_in_polygon.cu | 118 ++++++++++++---------- 1 file changed, 64 insertions(+), 54 deletions(-) diff --git a/cpp/src/join/quadtree_point_in_polygon.cu b/cpp/src/join/quadtree_point_in_polygon.cu index 4301595c4..816e6f930 100644 --- a/cpp/src/join/quadtree_point_in_polygon.cu +++ b/cpp/src/join/quadtree_point_in_polygon.cu @@ -110,62 +110,72 @@ struct compute_quadtree_point_in_polygon { rmm::cuda_stream_view stream, rmm::mr::device_memory_resource *mr) { - auto quad_lengths = quadtree.column(3); - auto quad_offsets = quadtree.column(4); - auto poly_indices = poly_quad_pairs.column(0); - auto quad_indices = poly_quad_pairs.column(1); - auto num_poly_quad_pairs = poly_quad_pairs.num_rows(); - - auto counting_iter = thrust::make_counting_iterator(0); - auto quad_lengths_iter = thrust::make_permutation_iterator(quad_lengths.begin(), - quad_indices.begin()); - - auto quad_offsets_iter = thrust::make_permutation_iterator(quad_offsets.begin(), - quad_indices.begin()); - - // Compute a "local" set of zero-based point offsets from number of points in each quadrant - // Use `num_poly_quad_pairs + 1` as the length so that the last element produced by - // `inclusive_scan` is the total number of points to be tested against any polygon. - rmm::device_uvector local_point_offsets(num_poly_quad_pairs + 1, stream); - - thrust::inclusive_scan(rmm::exec_policy(stream), - quad_lengths_iter, - quad_lengths_iter + num_poly_quad_pairs, - local_point_offsets.begin() + 1); - - // Ensure local point offsets starts at 0 - local_point_offsets.set_element_async(0, 0, stream); - - // The last element is the total number of points to test against any polygon. - auto num_total_points = local_point_offsets.back_element(stream); - - // Allocate memory for the polygon and point index pairs - rmm::device_uvector poly_idxs(num_total_points, stream); - rmm::device_uvector point_idxs(num_total_points, stream); - + // Wrapped in an IIFE so `local_point_offsets` is freed on return + auto poly_and_point_idxs = [&]() { + auto quad_lengths = quadtree.column(3); + auto quad_offsets = quadtree.column(4); + auto poly_indices = poly_quad_pairs.column(0); + auto quad_indices = poly_quad_pairs.column(1); + auto num_poly_quad_pairs = poly_quad_pairs.num_rows(); + + auto counting_iter = thrust::make_counting_iterator(0); + auto quad_lengths_iter = thrust::make_permutation_iterator(quad_lengths.begin(), + quad_indices.begin()); + + auto quad_offsets_iter = thrust::make_permutation_iterator(quad_offsets.begin(), + quad_indices.begin()); + + // Compute a "local" set of zero-based point offsets from number of points in each quadrant + // Use `num_poly_quad_pairs + 1` as the length so that the last element produced by + // `inclusive_scan` is the total number of points to be tested against any polygon. + rmm::device_uvector local_point_offsets(num_poly_quad_pairs + 1, stream); + + thrust::inclusive_scan(rmm::exec_policy(stream), + quad_lengths_iter, + quad_lengths_iter + num_poly_quad_pairs, + local_point_offsets.begin() + 1); + + // Ensure local point offsets starts at 0 + local_point_offsets.set_element_async(0, 0, stream); + + // The last element is the total number of points to test against any polygon. + auto num_total_points = local_point_offsets.back_element(stream); + + // Allocate memory for the polygon and point index pairs + rmm::device_uvector poly_idxs(num_total_points, stream); + rmm::device_uvector point_idxs(num_total_points, stream); + + auto poly_and_point_indices = make_zip_iterator(poly_idxs.begin(), point_idxs.begin()); + + // Compute the combination of polygon and point index pairs. For each polygon/quadrant pair, + // enumerate pairs of (poly_index, point_index) for each point in each quadrant. + // + // In Python pseudocode: + // ``` + // pp_pairs = [] + // for polygon, quadrant in pq_pairs: + // for point in quadrant: + // pp_pairs.append((polygon, point)) + // ``` + // + thrust::transform(rmm::exec_policy(stream), + counting_iter, + counting_iter + num_total_points, + poly_and_point_indices, + compute_poly_and_point_indices{ + quad_offsets_iter, + local_point_offsets.begin(), + local_point_offsets.size() - 1, + *cudf::column_device_view::create(poly_indices, stream)}); + + return std::make_tuple(std::move(poly_idxs), std::move(point_idxs), num_total_points); + }(); + + auto &poly_idxs = std::get<0>(poly_and_point_idxs); + auto &point_idxs = std::get<1>(poly_and_point_idxs); + auto &num_total_points = std::get<2>(poly_and_point_idxs); auto poly_and_point_indices = make_zip_iterator(poly_idxs.begin(), point_idxs.begin()); - // Compute the combination of polygon and point index pairs. For each polygon/quadrant pair, - // enumerate pairs of (poly_index, point_index) for each point in each quadrant. - // - // In Python pseudocode: - // ``` - // pp_pairs = [] - // for polygon, quadrant in pq_pairs: - // for point in quadrant: - // pp_pairs.append((polygon, point)) - // ``` - // - thrust::transform(rmm::exec_policy(stream), - counting_iter, - counting_iter + num_total_points, - poly_and_point_indices, - compute_poly_and_point_indices{ - quad_offsets_iter, - local_point_offsets.begin(), - local_point_offsets.size() - 1, - *cudf::column_device_view::create(poly_indices, stream)}); - // Enumerate the point X/Ys using the sorted `point_indices` (from quadtree construction) auto point_xys_iter = thrust::make_permutation_iterator( make_zip_iterator(point_x.begin(), point_y.begin()), point_indices.begin()); From b84306919e3b087bf17c710bc9c8523f507f42f7 Mon Sep 17 00:00:00 2001 From: ptaylor Date: Wed, 31 Mar 2021 17:15:01 -0500 Subject: [PATCH 03/22] update for set_element_async API change --- cpp/src/join/quadtree_point_to_nearest_polyline.cu | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cpp/src/join/quadtree_point_to_nearest_polyline.cu b/cpp/src/join/quadtree_point_to_nearest_polyline.cu index a9fa2c015..b33831a0d 100644 --- a/cpp/src/join/quadtree_point_to_nearest_polyline.cu +++ b/cpp/src/join/quadtree_point_to_nearest_polyline.cu @@ -125,7 +125,8 @@ struct compute_quadtree_point_to_nearest_polyline { local_point_offsets.begin() + 1); // Ensure local point offsets starts at 0 - local_point_offsets.set_element_async(0, 0, stream); + uint32_t init{0}; + local_point_offsets.set_element_async(0, init, stream); // The last element is the total number of points to test against any polyline. auto num_total_points = local_point_offsets.back_element(stream); From e79caa99ff05935f72a44bb36007525e83c09cdb Mon Sep 17 00:00:00 2001 From: ptaylor Date: Wed, 7 Apr 2021 16:18:40 -0500 Subject: [PATCH 04/22] add p2np iterator that yields point/polyline/distances in an order compatible with reduce_by_key --- .../quadtree_point_to_nearest_polyline.cu | 358 ++++++++++++------ 1 file changed, 239 insertions(+), 119 deletions(-) diff --git a/cpp/src/join/quadtree_point_to_nearest_polyline.cu b/cpp/src/join/quadtree_point_to_nearest_polyline.cu index b33831a0d..e3a44c6ba 100644 --- a/cpp/src/join/quadtree_point_to_nearest_polyline.cu +++ b/cpp/src/join/quadtree_point_to_nearest_polyline.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,58 +23,143 @@ #include #include #include +#include #include #include +#include +#include #include #include #include #include #include +#include #include #include #include +#include #include #include +#include #include namespace cuspatial { namespace detail { namespace { -template +inline __device__ thrust::tuple get_quad_poly_and_local_point_indices( + uint32_t const global_index, uint32_t const *point_offsets, uint32_t const *point_offsets_end) +{ + // Calculate the position in "point_offsets" that `global_index` falls between. + // This position is the index of the poly/quad pair for this `global_index`. + // + // Dereferencing `local_point_offset` yields the zero-based first point position of this + // quadrant. Adding this zero-based position to the quadrant's first point position in the + // quadtree yields the "global" position in the `point_indices` map. + auto const local_point_offset = + thrust::upper_bound(thrust::seq, point_offsets, point_offsets_end, global_index) - 1; + return thrust::make_tuple( + // quad_poly_index + thrust::distance(point_offsets, local_point_offset), + // local_point_index + global_index - *local_point_offset); +} + +inline __device__ thrust::tuple get_local_poly_index_and_count( + uint32_t const poly_index, uint32_t const *quad_offsets, uint32_t const *quad_offsets_end) +{ + bool lhs_done{false}; + bool rhs_done{false}; + uint32_t num_polys_in_quad{0}; + auto const l_end = quad_offsets; + auto const r_end = quad_offsets_end - 1; + auto const quad_offset = quad_offsets[poly_index]; + auto l_itr = quad_offsets + poly_index; + auto r_itr = quad_offsets + poly_index; + do { + num_polys_in_quad += !(lhs_done = lhs_done || l_itr <= l_end || *(--l_itr) != quad_offset); + num_polys_in_quad += !(rhs_done = rhs_done || r_itr >= r_end || *(++r_itr) != quad_offset); + } while (!lhs_done || !rhs_done); + auto const local_poly_index = + static_cast(thrust::distance(l_itr, quad_offsets + poly_index)); + return thrust::make_tuple(max(1u, local_poly_index) - 1, num_polys_in_quad + 1); +} + +inline __device__ thrust::tuple get_transposed_point_and_pair_index( + uint32_t const global_index, + uint32_t const *point_offsets, + uint32_t const *point_offsets_end, + uint32_t const *quad_offsets, + uint32_t const *quad_offsets_end, + uint32_t const *quad_lengths) +{ + uint32_t quad_poly_index{0}, local_point_offset{0}; + thrust::tie(quad_poly_index, local_point_offset) = + get_quad_poly_and_local_point_indices(global_index, point_offsets, point_offsets_end); + + uint32_t local_poly_index{0}, num_polys_in_quad{0}; + thrust::tie(local_poly_index, num_polys_in_quad) = + get_local_poly_index_and_count(quad_poly_index, quad_offsets, quad_offsets_end); + + auto const quad_point_offset = quad_offsets[quad_poly_index]; + auto const num_points_in_quad = quad_lengths[quad_poly_index]; + auto const quad_poly_offset = quad_poly_index - local_poly_index; + auto const quad_poly_point_start = local_poly_index * num_points_in_quad; + auto const transposed_point_start = quad_poly_point_start + local_point_offset; + + return thrust::make_tuple( + // transposed point index + (transposed_point_start / num_polys_in_quad) + quad_point_offset, + // transposed polyline index + (transposed_point_start % num_polys_in_quad) + quad_poly_offset); +} + +struct compute_point_indices { + uint32_t const *point_offsets; + uint32_t const *point_offsets_end; + uint32_t const *quad_offsets; + uint32_t const *quad_offsets_end; + uint32_t const *quad_lengths; + uint32_t __device__ operator()(uint32_t const global_index) + { + return thrust::get<0>(get_transposed_point_and_pair_index(global_index, + point_offsets, + point_offsets_end, + quad_offsets, + quad_offsets_end, + quad_lengths)); + } +}; + +template struct compute_point_poly_indices_and_distances { PointIter points; - QuadOffsetsIter quad_point_offsets; - uint32_t const *local_point_offsets; - size_t const num_local_point_offsets; - cudf::column_device_view const poly_indices; + uint32_t const *point_offsets; + uint32_t const *point_offsets_end; + uint32_t const *quad_offsets; + uint32_t const *quad_offsets_end; + uint32_t const *quad_lengths; + uint32_t const *poly_indices; cudf::column_device_view const poly_offsets; cudf::column_device_view const poly_points_x; cudf::column_device_view const poly_points_y; - thrust::tuple __device__ operator()(cudf::size_type const i) + thrust::tuple __device__ operator()(uint32_t const global_index) { - // Calculate the position in "local_point_offsets" that `i` falls between. - // This position is the index of the poly/quad pair for this `i`. - // - // Dereferencing `local_point_offset` yields the zero-based first point position of this - // quadrant. Adding this zero-based position to the quadrant's first point position in the - // quadtree yields the "global" position in the `point_indices` map. - auto po_begin = local_point_offsets; - auto po_end = local_point_offsets + num_local_point_offsets; - auto const local_point_offset = thrust::upper_bound(thrust::seq, po_begin, po_end, i) - 1; - uint32_t const pairs_idx = thrust::distance(local_point_offsets, local_point_offset); - uint32_t const point_idx = quad_point_offsets[pairs_idx] + (i - *local_point_offset); - uint32_t const poly_idx = poly_indices.element(pairs_idx); - auto const &point = points[point_idx]; - auto const distance = point_to_poly_line_distance(thrust::get<0>(point), - thrust::get<1>(point), + // (point_index, poly_index) + auto const ids = get_transposed_point_and_pair_index( + global_index, point_offsets, point_offsets_end, quad_offsets, quad_offsets_end, quad_lengths); + + auto const point_xy = points[thrust::get<0>(ids)]; + auto const poly_idx = poly_indices[thrust::get<1>(ids)]; + auto const distance = point_to_poly_line_distance(thrust::get<0>(point_xy), // x + thrust::get<1>(point_xy), // y poly_idx, poly_offsets, poly_points_x, poly_points_y); - return thrust::make_tuple(point_idx, poly_idx, distance); + return thrust::make_tuple(poly_idx, distance); } }; @@ -99,91 +184,128 @@ struct compute_quadtree_point_to_nearest_polyline { rmm::cuda_stream_view stream, rmm::mr::device_memory_resource *mr) { - // Wrapped in an IIFE so `local_point_offsets` is freed on return - auto poly_point_idxs_and_distances = [&]() { - auto quad_lengths = quadtree.column(3); - auto quad_offsets = quadtree.column(4); - auto poly_indices = poly_quad_pairs.column(0); - auto quad_indices = poly_quad_pairs.column(1); - auto num_poly_quad_pairs = poly_quad_pairs.num_rows(); - - auto counting_iter = thrust::make_counting_iterator(0); - auto quad_lengths_iter = thrust::make_permutation_iterator(quad_lengths.begin(), - quad_indices.begin()); - - auto quad_offsets_iter = thrust::make_permutation_iterator(quad_offsets.begin(), - quad_indices.begin()); - - // Compute a "local" set of zero-based point offsets from number of points in each quadrant - // Use `num_poly_quad_pairs + 1` as the length so that the last element produced by - // `inclusive_scan` is the total number of points to be tested against any polyline. - rmm::device_uvector local_point_offsets(num_poly_quad_pairs + 1, stream); - - thrust::inclusive_scan(rmm::exec_policy(stream), - quad_lengths_iter, - quad_lengths_iter + num_poly_quad_pairs, - local_point_offsets.begin() + 1); - - // Ensure local point offsets starts at 0 - uint32_t init{0}; - local_point_offsets.set_element_async(0, init, stream); - - // The last element is the total number of points to test against any polyline. - auto num_total_points = local_point_offsets.back_element(stream); - - // Allocate memory for the polyline/point indices and distances - rmm::device_uvector poly_idxs(num_total_points, stream); - rmm::device_uvector point_idxs(num_total_points, stream); - rmm::device_uvector poly_point_distances(num_total_points, stream); - - auto point_poly_indices_and_distances = - make_zip_iterator(point_idxs.begin(), poly_idxs.begin(), poly_point_distances.begin()); - - // Enumerate the point X/Ys using the sorted `point_indices` (from quadtree construction) - auto point_xys_iter = thrust::make_permutation_iterator( - make_zip_iterator(point_x.begin(), point_y.begin()), point_indices.begin()); - - // Compute the combination of point and polyline index pairs. For each polyline/quadrant pair, - // enumerate pairs of (point_index, poly_index) for each point in each quadrant, and calculate - // the minimum distance between each point/poly pair. - // - // In Python pseudocode: - // ``` - // pp_pairs_and_dist = [] - // for polyline, quadrant in pq_pairs: - // for point in quadrant: - // pp_pairs_and_dist.append((point, polyline, min_distance(point, polyline))) - // ``` - // - thrust::transform(rmm::exec_policy(stream), - counting_iter, - counting_iter + num_total_points, - point_poly_indices_and_distances, - compute_point_poly_indices_and_distances{ - point_xys_iter, - quad_offsets_iter, - local_point_offsets.begin(), - local_point_offsets.size() - 1, - *cudf::column_device_view::create(poly_indices, stream), - *cudf::column_device_view::create(poly_offsets, stream), - *cudf::column_device_view::create(poly_points_x, stream), - *cudf::column_device_view::create(poly_points_y, stream)}); - - // sort the point/polyline indices and distances for `reduce_by_key` below - thrust::sort_by_key(rmm::exec_policy(stream), - point_idxs.begin(), - point_idxs.end(), - point_poly_indices_and_distances); - - return std::make_tuple( - std::move(poly_idxs), std::move(point_idxs), std::move(poly_point_distances)); - }(); - - auto &poly_idxs = std::get<0>(poly_point_idxs_and_distances); - auto &point_idxs = std::get<1>(poly_point_idxs_and_distances); - auto &poly_point_distances = std::get<2>(poly_point_idxs_and_distances); + auto quad_lengths = quadtree.column(3); + auto quad_offsets = quadtree.column(4); + auto num_poly_quad_pairs = poly_quad_pairs.num_rows(); + + rmm::device_uvector quad_poly_indices(num_poly_quad_pairs, stream); + rmm::device_uvector quad_point_offsets(num_poly_quad_pairs, stream); + rmm::device_uvector quad_point_lengths(num_poly_quad_pairs, stream); + + auto quad_poly_indices_and_quad_offsets_lengths = make_zip_iterator( + // poly_indices + poly_quad_pairs.column(0).begin(), + // quad_offsets + thrust::make_permutation_iterator(quadtree.column(4).begin(), + poly_quad_pairs.column(1).begin()), + // quad_lengths + thrust::make_permutation_iterator(quadtree.column(3).begin(), + poly_quad_pairs.column(1).begin())); + + // Copy the quad/polyline indices into local vectors instead of sorting the input columns + thrust::copy( + rmm::exec_policy(stream), + quad_poly_indices_and_quad_offsets_lengths, + quad_poly_indices_and_quad_offsets_lengths + num_poly_quad_pairs, + make_zip_iterator( + quad_poly_indices.begin(), quad_point_offsets.begin(), quad_point_lengths.begin())); + + // Sort the quad/polyline index pairs by quadrant so that the `point_indices` are + // produced/reduced in ascending order below. + thrust::stable_sort_by_key( + rmm::exec_policy(stream), + quad_point_offsets.begin(), + quad_point_offsets.end(), + make_zip_iterator(quad_point_lengths.begin(), quad_poly_indices.begin())); + + // Compute a "local" set of zero-based point offsets from number of points in each quadrant + // Use `num_poly_quad_pairs + 1` as the length so that the last element produced by + // `inclusive_scan` is the total number of points to be tested against any polyline. + rmm::device_uvector local_point_offsets(num_poly_quad_pairs + 1, stream); + + // Ensure local point offsets starts at 0 + uint32_t init{0}; + local_point_offsets.set_element_async(0, init, stream); + + thrust::inclusive_scan(rmm::exec_policy(stream), + quad_point_lengths.begin(), + quad_point_lengths.end(), + local_point_offsets.begin() + 1); + + // The last element is the total number of points to test against any polyline. + auto num_point_poly_pairs = local_point_offsets.back_element(stream); + + // Enumerate the point X/Ys using the sorted `point_indices` (from quadtree construction) + auto point_xys_iter = thrust::make_permutation_iterator( + make_zip_iterator(point_x.begin(), point_y.begin()), point_indices.begin()); + + // + // Compute the combination of point and polyline index pairs. For each polyline/quadrant pair, + // enumerate pairs of (point_index, polyline_index) for each point in each quadrant, and + // calculate the minimum distance between each point/poly pair. + // + // In Python pseudocode: + // ``` + // pp_pairs_and_dist = [] + // for polyline, quadrant in pq_pairs: + // for point in quadrant: + // pp_pairs_and_dist.append((point, polyline, min_distance(point, polyline))) + // ``` + // + // However, the above psuedocode produces values in an order such that the distance + // from a point to each polyline cannot be reduced with `thrust::reduce_by_key`: + // ``` + // point | polyline | distance + // 0 | 0 | 10.0 + // 1 | 0 | 30.0 + // 2 | 0 | 20.0 + // 0 | 1 | 30.0 + // 1 | 1 | 20.0 + // 2 | 1 | 10.0 + // ``` + // + // In order to use `thrust::reduce_by_key` to compute the minimum distance from a point to + // the polylines in its quadrant, the above table needs to be sorted by `point` instead of + // `polyline`: + // ``` + // point | polyline | distance + // 0 | 0 | 10.0 + // 0 | 1 | 30.0 + // 1 | 0 | 30.0 + // 1 | 1 | 20.0 + // 2 | 0 | 20.0 + // 2 | 1 | 10.0 + // ``` + // + // A naive approach would be to allocate memory for the above three columns, sort the + // columns by `point`, then use `thrust::reduce_by_key` to compute the min distances. + // + // The sizes of the intermediate buffers required can easily grow beyond available + // device memory, so a better approach is to use a Thrust iterator to yield values + // in the sorted order on demand instead, which is what we're doing here. + // + + auto all_pairs_point_indices = + thrust::make_transform_iterator(thrust::make_counting_iterator(0u), + compute_point_indices{local_point_offsets.begin(), + local_point_offsets.end(), + quad_point_offsets.begin(), + quad_point_offsets.end(), + quad_point_lengths.begin()}); + + auto all_pairs_poly_indices_and_distances = thrust::make_transform_iterator( + thrust::make_counting_iterator(0u), + compute_point_poly_indices_and_distances{ + point_xys_iter, + local_point_offsets.begin(), + local_point_offsets.end(), + quad_point_offsets.begin(), + quad_point_offsets.end(), + quad_point_lengths.begin(), + quad_poly_indices.begin(), + *cudf::column_device_view::create(poly_offsets, stream), + *cudf::column_device_view::create(poly_points_x, stream), + *cudf::column_device_view::create(poly_points_y, stream)}); // Allocate output columns for the point and polyline index pairs and their distances auto point_index_col = make_fixed_width_column(point_x.size(), stream, mr); @@ -197,22 +319,20 @@ struct compute_quadtree_point_to_nearest_polyline { std::numeric_limits::max()); // Reduce the intermediate point/poly indices to lists of point/polyline - // index pairs and distances, selecting the polyline index closest for each point. + // index pairs and distances, selecting the polyline index closest to each point. thrust::reduce_by_key(rmm::exec_policy(stream), - // keys_first - point_idxs.begin(), - // keys_last - point_idxs.end(), - // values_first - make_zip_iterator(poly_idxs.begin(), poly_point_distances.begin()), - // keys_output + // point indices in + all_pairs_point_indices, + all_pairs_point_indices + num_point_poly_pairs, + all_pairs_poly_indices_and_distances, + // point indices out point_index_col->mutable_view().begin(), - // values_output + // polyline indices and distances out make_zip_iterator(poly_index_col->mutable_view().begin(), distance_col->mutable_view().template begin()), - // binary_pred + // point index comparator thrust::equal_to(), - // binary_op + // binop to select the point/polyline pair with the smallest distance [] __device__(auto const &a, auto const &b) { return thrust::get<1>(a) < thrust::get<1>(b) ? a : b; }); From e07483b1df41a32752409b7d3a30d5010ad457b9 Mon Sep 17 00:00:00 2001 From: ptaylor Date: Wed, 7 Apr 2021 16:28:05 -0500 Subject: [PATCH 05/22] clean up p2np includes --- .../join/quadtree_point_to_nearest_polyline.cu | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/cpp/src/join/quadtree_point_to_nearest_polyline.cu b/cpp/src/join/quadtree_point_to_nearest_polyline.cu index e3a44c6ba..5ce49c940 100644 --- a/cpp/src/join/quadtree_point_to_nearest_polyline.cu +++ b/cpp/src/join/quadtree_point_to_nearest_polyline.cu @@ -23,27 +23,16 @@ #include #include #include -#include #include #include -#include -#include #include #include #include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include + +#include namespace cuspatial { namespace detail { From c0617595143b5ab7bbf41e1132b90fe91977daf5 Mon Sep 17 00:00:00 2001 From: ptaylor Date: Wed, 7 Apr 2021 22:10:06 -0500 Subject: [PATCH 06/22] use thrust binary searches instead of linear search --- .../quadtree_point_to_nearest_polyline.cu | 96 ++++++++----------- cpp/src/utility/point_to_nearest_polyline.cuh | 16 ++-- 2 files changed, 47 insertions(+), 65 deletions(-) diff --git a/cpp/src/join/quadtree_point_to_nearest_polyline.cu b/cpp/src/join/quadtree_point_to_nearest_polyline.cu index 5ce49c940..7f8212355 100644 --- a/cpp/src/join/quadtree_point_to_nearest_polyline.cu +++ b/cpp/src/join/quadtree_point_to_nearest_polyline.cu @@ -31,6 +31,8 @@ #include #include +#include +#include #include @@ -59,21 +61,19 @@ inline __device__ thrust::tuple get_quad_poly_and_local_poin inline __device__ thrust::tuple get_local_poly_index_and_count( uint32_t const poly_index, uint32_t const *quad_offsets, uint32_t const *quad_offsets_end) { - bool lhs_done{false}; - bool rhs_done{false}; - uint32_t num_polys_in_quad{0}; - auto const l_end = quad_offsets; - auto const r_end = quad_offsets_end - 1; + auto const lhs_end = quad_offsets; + auto const rhs_end = quad_offsets_end; auto const quad_offset = quad_offsets[poly_index]; - auto l_itr = quad_offsets + poly_index; - auto r_itr = quad_offsets + poly_index; - do { - num_polys_in_quad += !(lhs_done = lhs_done || l_itr <= l_end || *(--l_itr) != quad_offset); - num_polys_in_quad += !(rhs_done = rhs_done || r_itr >= r_end || *(++r_itr) != quad_offset); - } while (!lhs_done || !rhs_done); - auto const local_poly_index = - static_cast(thrust::distance(l_itr, quad_offsets + poly_index)); - return thrust::make_tuple(max(1u, local_poly_index) - 1, num_polys_in_quad + 1); + auto const lhs = + thrust::lower_bound(thrust::seq, lhs_end, quad_offsets + poly_index, quad_offset); + auto const rhs = + thrust::upper_bound(thrust::seq, quad_offsets + poly_index, rhs_end, quad_offset); + + return thrust::make_tuple( + // local_poly_index + static_cast(thrust::distance(lhs, quad_offsets + poly_index)), + // num_polys_in_quad + static_cast(thrust::distance(lhs, rhs))); } inline __device__ thrust::tuple get_transposed_point_and_pair_index( @@ -105,23 +105,6 @@ inline __device__ thrust::tuple get_transposed_point_and_pai (transposed_point_start % num_polys_in_quad) + quad_poly_offset); } -struct compute_point_indices { - uint32_t const *point_offsets; - uint32_t const *point_offsets_end; - uint32_t const *quad_offsets; - uint32_t const *quad_offsets_end; - uint32_t const *quad_lengths; - uint32_t __device__ operator()(uint32_t const global_index) - { - return thrust::get<0>(get_transposed_point_and_pair_index(global_index, - point_offsets, - point_offsets_end, - quad_offsets, - quad_offsets_end, - quad_lengths)); - } -}; - template struct compute_point_poly_indices_and_distances { PointIter points; @@ -134,21 +117,22 @@ struct compute_point_poly_indices_and_distances { cudf::column_device_view const poly_offsets; cudf::column_device_view const poly_points_x; cudf::column_device_view const poly_points_y; - thrust::tuple __device__ operator()(uint32_t const global_index) + inline __device__ thrust::tuple operator()(uint32_t const global_index) { // (point_index, poly_index) auto const ids = get_transposed_point_and_pair_index( global_index, point_offsets, point_offsets_end, quad_offsets, quad_offsets_end, quad_lengths); - auto const point_xy = points[thrust::get<0>(ids)]; + auto const point_id = thrust::get<0>(ids); + auto const point_xy = points[point_id]; auto const poly_idx = poly_indices[thrust::get<1>(ids)]; - auto const distance = point_to_poly_line_distance(thrust::get<0>(point_xy), // x - thrust::get<1>(point_xy), // y - poly_idx, - poly_offsets, - poly_points_x, - poly_points_y); - return thrust::make_tuple(poly_idx, distance); + auto const distance = point_to_poly_line_distance(thrust::get<0>(point_xy), // x + thrust::get<1>(point_xy), // y + poly_idx, + poly_offsets, + poly_points_x, + poly_points_y); + return thrust::make_tuple(point_id, poly_idx, distance); } }; @@ -274,15 +258,7 @@ struct compute_quadtree_point_to_nearest_polyline { // in the sorted order on demand instead, which is what we're doing here. // - auto all_pairs_point_indices = - thrust::make_transform_iterator(thrust::make_counting_iterator(0u), - compute_point_indices{local_point_offsets.begin(), - local_point_offsets.end(), - quad_point_offsets.begin(), - quad_point_offsets.end(), - quad_point_lengths.begin()}); - - auto all_pairs_poly_indices_and_distances = thrust::make_transform_iterator( + auto all_point_poly_indices_and_distances = thrust::make_transform_iterator( thrust::make_counting_iterator(0u), compute_point_poly_indices_and_distances{ point_xys_iter, @@ -296,6 +272,10 @@ struct compute_quadtree_point_to_nearest_polyline { *cudf::column_device_view::create(poly_points_x, stream), *cudf::column_device_view::create(poly_points_y, stream)}); + auto all_point_indices = + thrust::make_transform_iterator(all_point_poly_indices_and_distances, + [] __device__(auto const &x) { return thrust::get<0>(x); }); + // Allocate output columns for the point and polyline index pairs and their distances auto point_index_col = make_fixed_width_column(point_x.size(), stream, mr); auto poly_index_col = make_fixed_width_column(point_x.size(), stream, mr); @@ -311,19 +291,21 @@ struct compute_quadtree_point_to_nearest_polyline { // index pairs and distances, selecting the polyline index closest to each point. thrust::reduce_by_key(rmm::exec_policy(stream), // point indices in - all_pairs_point_indices, - all_pairs_point_indices + num_point_poly_pairs, - all_pairs_poly_indices_and_distances, + all_point_indices, + all_point_indices + num_point_poly_pairs, + all_point_poly_indices_and_distances, // point indices out - point_index_col->mutable_view().begin(), - // polyline indices and distances out - make_zip_iterator(poly_index_col->mutable_view().begin(), + thrust::make_discard_iterator(), + // point_index_col->mutable_view().begin(), + // point/polyline indices and distances out + make_zip_iterator(point_index_col->mutable_view().begin(), + poly_index_col->mutable_view().begin(), distance_col->mutable_view().template begin()), - // point index comparator + // comparator thrust::equal_to(), // binop to select the point/polyline pair with the smallest distance [] __device__(auto const &a, auto const &b) { - return thrust::get<1>(a) < thrust::get<1>(b) ? a : b; + return thrust::get<2>(a) < thrust::get<2>(b) ? a : b; }); std::vector> cols{}; diff --git a/cpp/src/utility/point_to_nearest_polyline.cuh b/cpp/src/utility/point_to_nearest_polyline.cuh index 95144072c..fc9de799c 100644 --- a/cpp/src/utility/point_to_nearest_polyline.cuh +++ b/cpp/src/utility/point_to_nearest_polyline.cuh @@ -35,7 +35,7 @@ inline __device__ T point_to_poly_line_distance(T const px, auto ring_end = ring_idx < ring_offsets.size() - 1 ? ring_offsets.element(ring_idx + 1) : poly_points_x.size(); auto ring_len = ring_end - ring_begin; - for (auto point_idx = 0; point_idx < ring_len; ++point_idx) { + for (auto point_idx = 0u; point_idx < ring_len; ++point_idx) { auto const i0 = ring_begin + ((point_idx + 0) % ring_len); auto const i1 = ring_begin + ((point_idx + 1) % ring_len); auto const x0 = poly_points_x.element(i0); @@ -45,13 +45,13 @@ inline __device__ T point_to_poly_line_distance(T const px, auto const dx0 = px - x0, dy0 = py - y0; auto const dx1 = px - x1, dy1 = py - y1; auto const dx2 = x1 - x0, dy2 = y1 - y0; - auto const d0 = dx0 * dx0 + dy0 * dy0; - auto const d1 = dx1 * dx1 + dy1 * dy1; - auto const d2 = dx2 * dx2 + dy2 * dy2; - auto const d3 = dx2 * dx0 + dy2 * dy0; - auto const r = d3 * d3 / d2; - auto const d = d3 <= 0 || r >= d2 ? min(d0, d1) : d0 - r; - if (d < distance_squared) { distance_squared = d; } + auto const d0 = dx0 * dx0 + dy0 * dy0; + auto const d1 = dx1 * dx1 + dy1 * dy1; + auto const d2 = dx2 * dx2 + dy2 * dy2; + auto const d3 = dx2 * dx0 + dy2 * dy0; + auto const r = d3 * d3 / d2; + auto const d = d3 <= 0 || r >= d2 ? min(d0, d1) : d0 - r; + distance_squared = min(distance_squared, d); } return sqrt(distance_squared); From 1fa3514531d9e3085dfe524cab01036abc98b4ab Mon Sep 17 00:00:00 2001 From: ptaylor Date: Fri, 9 Apr 2021 12:10:11 -0500 Subject: [PATCH 07/22] fix GCC 9 RVO warning/error --- cpp/tests/join/point_in_polygon_test_large.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/tests/join/point_in_polygon_test_large.cpp b/cpp/tests/join/point_in_polygon_test_large.cpp index a9a785c1c..8f9a91f9f 100644 --- a/cpp/tests/join/point_in_polygon_test_large.cpp +++ b/cpp/tests/join/point_in_polygon_test_large.cpp @@ -96,7 +96,7 @@ inline auto make_polygons_geometry(thrust::host_vector const &poly_off } polygons.push_back(polygon); } - return std::move(polygons); + return polygons; } template From cc1cb966389553343b0a8d7a4997f83224324389 Mon Sep 17 00:00:00 2001 From: ptaylor Date: Fri, 9 Apr 2021 12:10:53 -0500 Subject: [PATCH 08/22] use new Thrust 1.12.0 make_zip_iterator instead of our custom helper --- .../indexing/construction/detail/phase_1.cuh | 8 ++++---- .../indexing/construction/detail/phase_2.cuh | 16 ++++++++-------- .../indexing/construction/detail/utilities.cuh | 12 ------------ cpp/src/indexing/construction/point_quadtree.cu | 12 ++++++------ cpp/src/join/detail/intersection.cuh | 5 +++-- cpp/src/join/detail/traversal.cuh | 17 +++++++++-------- cpp/src/join/quadtree_point_in_polygon.cu | 6 ++++-- .../join/quadtree_point_to_nearest_polyline.cu | 1 + cpp/src/join/quadtree_poly_filtering.cu | 6 ++++-- 9 files changed, 39 insertions(+), 44 deletions(-) diff --git a/cpp/src/indexing/construction/detail/phase_1.cuh b/cpp/src/indexing/construction/detail/phase_1.cuh index 00be5e594..71413156e 100644 --- a/cpp/src/indexing/construction/detail/phase_1.cuh +++ b/cpp/src/indexing/construction/detail/phase_1.cuh @@ -65,8 +65,8 @@ compute_point_keys_and_sorted_indices(cudf::column_view const &x, { rmm::device_uvector keys(x.size(), stream); thrust::transform(rmm::exec_policy(stream), - make_zip_iterator(x.begin(), y.begin()), - make_zip_iterator(x.begin(), y.begin()) + x.size(), + thrust::make_zip_iterator(x.begin(), y.begin()), + thrust::make_zip_iterator(x.begin(), y.begin()) + x.size(), keys.begin(), [=] __device__(auto const &point) { T x, y; @@ -147,11 +147,11 @@ build_tree_levels(int8_t max_depth, keys_begin, [] __device__(uint32_t const child_key) { return (child_key >> 2); }); // iterator for the current level's quad node point and child counts - auto child_nodes = make_zip_iterator(quad_point_count_begin, quad_child_count_begin); + auto child_nodes = thrust::make_zip_iterator(quad_point_count_begin, quad_child_count_begin); // iterator for the current level's initial values auto child_values = - make_zip_iterator(quad_point_count_begin, thrust::make_constant_iterator(1)); + thrust::make_zip_iterator(quad_point_count_begin, thrust::make_constant_iterator(1)); for (cudf::size_type level = max_depth - 1; level >= 0; --level) { auto num_full_quads = build_tree_level(parent_keys + begin, diff --git a/cpp/src/indexing/construction/detail/phase_2.cuh b/cpp/src/indexing/construction/detail/phase_2.cuh index 62002e44a..c18350ce6 100644 --- a/cpp/src/indexing/construction/detail/phase_2.cuh +++ b/cpp/src/indexing/construction/detail/phase_2.cuh @@ -70,7 +70,7 @@ inline rmm::device_uvector flatten_point_keys( { rmm::device_uvector flattened_keys(num_valid_nodes, stream); auto keys_and_levels = - make_zip_iterator(quad_keys.begin(), quad_level.begin(), indicator.begin()); + thrust::make_zip_iterator(quad_keys.begin(), quad_level.begin(), indicator.begin()); thrust::transform(rmm::exec_policy(stream), keys_and_levels, keys_and_levels + num_valid_nodes, @@ -122,7 +122,7 @@ inline rmm::device_uvector compute_flattened_first_point_positions( rmm::exec_policy(stream), flattened_keys.begin(), flattened_keys.end(), - make_zip_iterator(initial_sort_indices.begin(), quad_point_count_tmp.begin())); + thrust::make_zip_iterator(initial_sort_indices.begin(), quad_point_count_tmp.begin())); thrust::remove_if(rmm::exec_policy(stream), quad_point_count_tmp.begin(), @@ -157,7 +157,7 @@ inline rmm::device_uvector compute_flattened_first_point_positions( quad_point_offsets_tmp.begin()); auto counts_and_offsets = - make_zip_iterator(quad_point_count_tmp.begin(), quad_point_offsets_tmp.begin()); + thrust::make_zip_iterator(quad_point_count_tmp.begin(), quad_point_offsets_tmp.begin()); thrust::stable_sort_by_key(rmm::exec_policy(stream), initial_sort_indices.begin(), @@ -170,7 +170,7 @@ inline rmm::device_uvector compute_flattened_first_point_positions( counts_and_offsets, counts_and_offsets + leaf_offsets.size(), leaf_offsets.begin(), - make_zip_iterator(quad_point_count.begin(), quad_point_offsets.begin())); + thrust::make_zip_iterator(quad_point_count.begin(), quad_point_offsets.begin())); quad_point_offsets.shrink_to_fit(stream); @@ -261,10 +261,10 @@ inline std::pair remove_unqualified_quads( // Start counting nodes at level 2, since children of the root node should not // be discarded. // line 5 of algorithm in Fig. 5 in ref. - auto tree = make_zip_iterator(quad_keys.begin() + level_1_size, - quad_point_count.begin() + level_1_size, - quad_child_count.begin() + level_1_size, - quad_levels.begin() + level_1_size); + auto tree = thrust::make_zip_iterator(quad_keys.begin() + level_1_size, + quad_point_count.begin() + level_1_size, + quad_child_count.begin() + level_1_size, + quad_levels.begin() + level_1_size); auto last_valid = thrust::remove_if(rmm::exec_policy(stream), diff --git a/cpp/src/indexing/construction/detail/utilities.cuh b/cpp/src/indexing/construction/detail/utilities.cuh index b41c7a1a7..e1bb2151a 100644 --- a/cpp/src/indexing/construction/detail/utilities.cuh +++ b/cpp/src/indexing/construction/detail/utilities.cuh @@ -18,26 +18,14 @@ #include #include -#include -#include #include -#include #include namespace cuspatial { namespace detail { -/** - * @brief Helper function to reduce verbosity creating thrust zip iterators - */ -template -inline auto make_zip_iterator(Ts... its) -{ - return thrust::make_zip_iterator(thrust::make_tuple(std::forward(its)...)); -} - template struct tuple_sum { inline __device__ thrust::tuple operator()(thrust::tuple const& a, diff --git a/cpp/src/indexing/construction/point_quadtree.cu b/cpp/src/indexing/construction/point_quadtree.cu index 5bf86c2b0..f822578eb 100644 --- a/cpp/src/indexing/construction/point_quadtree.cu +++ b/cpp/src/indexing/construction/point_quadtree.cu @@ -107,9 +107,9 @@ inline std::unique_ptr make_quad_tree(rmm::device_uvector level_1_size); auto &offsets = quad_child_pos; - auto offsets_iter = make_zip_iterator(is_quad->view().begin(), - quad_child_pos->view().template begin(), - quad_point_pos.begin()); + auto offsets_iter = thrust::make_zip_iterator(is_quad->view().begin(), + quad_child_pos->view().template begin(), + quad_point_pos.begin()); // for each value in `is_quad` copy from `quad_child_pos` if true, else // `quad_point_pos` @@ -129,9 +129,9 @@ inline std::unique_ptr make_quad_tree(rmm::device_uvector auto lengths = make_fixed_width_column(num_valid_nodes, stream, mr); // for each value in `is_quad` copy from `quad_child_count` if true, else // `quad_point_count` - auto lengths_iter = make_zip_iterator(is_quad->view().begin(), // - quad_child_count.begin(), - quad_point_count.begin()); + auto lengths_iter = thrust::make_zip_iterator(is_quad->view().begin(), // + quad_child_count.begin(), + quad_point_count.begin()); thrust::transform(rmm::exec_policy(stream), lengths_iter, lengths_iter + num_valid_nodes, diff --git a/cpp/src/join/detail/intersection.cuh b/cpp/src/join/detail/intersection.cuh index efdac370a..7ce6efe61 100644 --- a/cpp/src/join/detail/intersection.cuh +++ b/cpp/src/join/detail/intersection.cuh @@ -29,6 +29,7 @@ #include #include +#include #include #include @@ -97,8 +98,8 @@ inline std::pair find_intersections( auto d_poly_y_max = cudf::column_device_view::create(poly_bbox.column(3), stream); thrust::transform(rmm::exec_policy(stream), - make_zip_iterator(node_indices, poly_indices), - make_zip_iterator(node_indices, poly_indices) + num_pairs, + thrust::make_zip_iterator(node_indices, poly_indices), + thrust::make_zip_iterator(node_indices, poly_indices) + num_pairs, node_pairs, [x_min, y_min, diff --git a/cpp/src/join/detail/traversal.cuh b/cpp/src/join/detail/traversal.cuh index e984f955c..adef3309e 100644 --- a/cpp/src/join/detail/traversal.cuh +++ b/cpp/src/join/detail/traversal.cuh @@ -25,6 +25,7 @@ #include #include +#include #include #include #include @@ -106,15 +107,15 @@ descend_quadtree(LengthsIter counts, parent_indices.begin(), parent_indices.begin() + num_children, // curr level iterator - make_zip_iterator(parent_types.begin(), - parent_levels.begin(), - parent_node_indices.begin(), - parent_poly_indices.begin()), + thrust::make_zip_iterator(parent_types.begin(), + parent_levels.begin(), + parent_node_indices.begin(), + parent_poly_indices.begin()), // next level iterator - make_zip_iterator(child_types.begin(), - child_levels.begin(), - child_quad_indices.begin(), - child_poly_indices.begin())); + thrust::make_zip_iterator(child_types.begin(), + child_levels.begin(), + child_quad_indices.begin(), + child_poly_indices.begin())); rmm::device_uvector relative_child_offsets(num_children, stream); thrust::exclusive_scan_by_key(rmm::exec_policy(stream), diff --git a/cpp/src/join/quadtree_point_in_polygon.cu b/cpp/src/join/quadtree_point_in_polygon.cu index a232610dd..601f2d6f7 100644 --- a/cpp/src/join/quadtree_point_in_polygon.cu +++ b/cpp/src/join/quadtree_point_in_polygon.cu @@ -34,6 +34,7 @@ #include #include #include +#include #include #include @@ -144,7 +145,7 @@ struct compute_quadtree_point_in_polygon { rmm::device_uvector poly_idxs(num_total_points, stream); rmm::device_uvector point_idxs(num_total_points, stream); - auto poly_and_point_indices = make_zip_iterator(poly_idxs.begin(), point_idxs.begin()); + auto poly_and_point_indices = thrust::make_zip_iterator(poly_idxs.begin(), point_idxs.begin()); // Compute the combination of polygon and point index pairs. For each polygon/quadrant pair, // enumerate pairs of (poly_index, point_index) for each point in each quadrant. @@ -169,7 +170,8 @@ struct compute_quadtree_point_in_polygon { // Enumerate the point X/Ys using the sorted `point_indices` (from quadtree construction) auto point_xys_iter = thrust::make_permutation_iterator( - make_zip_iterator(point_x.begin(), point_y.begin()), point_indices.begin()); + thrust::make_zip_iterator(point_x.begin(), point_y.begin()), + point_indices.begin()); // Compute the number of intersections by removing (poly, point) pairs that don't intersect auto num_intersections = thrust::distance( diff --git a/cpp/src/join/quadtree_point_to_nearest_polyline.cu b/cpp/src/join/quadtree_point_to_nearest_polyline.cu index 8d9c025b9..18b45464c 100644 --- a/cpp/src/join/quadtree_point_to_nearest_polyline.cu +++ b/cpp/src/join/quadtree_point_to_nearest_polyline.cu @@ -34,6 +34,7 @@ #include #include #include +#include #include #include diff --git a/cpp/src/join/quadtree_poly_filtering.cu b/cpp/src/join/quadtree_poly_filtering.cu index 74a86a662..2e3c261cf 100644 --- a/cpp/src/join/quadtree_poly_filtering.cu +++ b/cpp/src/join/quadtree_poly_filtering.cu @@ -30,6 +30,8 @@ #include #include +#include + #include namespace cuspatial { @@ -89,13 +91,13 @@ inline std::unique_ptr join_quadtree_and_bboxes(cudf::table_view co cudf::size_type num_parents{0}; auto make_current_level_iter = [&]() { - return make_zip_iterator( + return thrust::make_zip_iterator( cur_types.begin(), cur_levels.begin(), cur_node_idxs.begin(), cur_poly_idxs.begin()); }; auto make_output_values_iter = [&]() { return num_results + - make_zip_iterator( + thrust::make_zip_iterator( out_types.begin(), out_levels.begin(), out_node_idxs.begin(), out_poly_idxs.begin()); }; From 5a33ca72eacd27328288461a09395b40d6ac762a Mon Sep 17 00:00:00 2001 From: ptaylor Date: Fri, 9 Apr 2021 13:40:53 -0500 Subject: [PATCH 09/22] update copyrights --- cpp/src/indexing/construction/detail/phase_1.cuh | 2 +- cpp/src/indexing/construction/detail/phase_2.cuh | 2 +- cpp/src/indexing/construction/detail/utilities.cuh | 2 +- cpp/src/indexing/construction/point_quadtree.cu | 2 +- cpp/src/join/detail/intersection.cuh | 2 +- cpp/src/join/detail/traversal.cuh | 2 +- cpp/src/join/quadtree_point_in_polygon.cu | 2 +- cpp/src/join/quadtree_point_to_nearest_polyline.cu | 2 +- cpp/src/join/quadtree_poly_filtering.cu | 2 +- cpp/tests/join/point_in_polygon_test_large.cpp | 2 +- 10 files changed, 10 insertions(+), 10 deletions(-) diff --git a/cpp/src/indexing/construction/detail/phase_1.cuh b/cpp/src/indexing/construction/detail/phase_1.cuh index 71413156e..fd70a1227 100644 --- a/cpp/src/indexing/construction/detail/phase_1.cuh +++ b/cpp/src/indexing/construction/detail/phase_1.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/src/indexing/construction/detail/phase_2.cuh b/cpp/src/indexing/construction/detail/phase_2.cuh index c18350ce6..de85db6bc 100644 --- a/cpp/src/indexing/construction/detail/phase_2.cuh +++ b/cpp/src/indexing/construction/detail/phase_2.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/src/indexing/construction/detail/utilities.cuh b/cpp/src/indexing/construction/detail/utilities.cuh index e1bb2151a..b37176024 100644 --- a/cpp/src/indexing/construction/detail/utilities.cuh +++ b/cpp/src/indexing/construction/detail/utilities.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/src/indexing/construction/point_quadtree.cu b/cpp/src/indexing/construction/point_quadtree.cu index f822578eb..f6120feb0 100644 --- a/cpp/src/indexing/construction/point_quadtree.cu +++ b/cpp/src/indexing/construction/point_quadtree.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/src/join/detail/intersection.cuh b/cpp/src/join/detail/intersection.cuh index 7ce6efe61..3bbbc0766 100644 --- a/cpp/src/join/detail/intersection.cuh +++ b/cpp/src/join/detail/intersection.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/src/join/detail/traversal.cuh b/cpp/src/join/detail/traversal.cuh index adef3309e..62b57357e 100644 --- a/cpp/src/join/detail/traversal.cuh +++ b/cpp/src/join/detail/traversal.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/src/join/quadtree_point_in_polygon.cu b/cpp/src/join/quadtree_point_in_polygon.cu index 601f2d6f7..edda6fe45 100644 --- a/cpp/src/join/quadtree_point_in_polygon.cu +++ b/cpp/src/join/quadtree_point_in_polygon.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/src/join/quadtree_point_to_nearest_polyline.cu b/cpp/src/join/quadtree_point_to_nearest_polyline.cu index 18b45464c..fd86fe791 100644 --- a/cpp/src/join/quadtree_point_to_nearest_polyline.cu +++ b/cpp/src/join/quadtree_point_to_nearest_polyline.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/src/join/quadtree_poly_filtering.cu b/cpp/src/join/quadtree_poly_filtering.cu index 2e3c261cf..c77467fff 100644 --- a/cpp/src/join/quadtree_poly_filtering.cu +++ b/cpp/src/join/quadtree_poly_filtering.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/tests/join/point_in_polygon_test_large.cpp b/cpp/tests/join/point_in_polygon_test_large.cpp index 8f9a91f9f..edc84943f 100644 --- a/cpp/tests/join/point_in_polygon_test_large.cpp +++ b/cpp/tests/join/point_in_polygon_test_large.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. From 782af8af9d3d35768115bd56b36e494c26269f6d Mon Sep 17 00:00:00 2001 From: ptaylor Date: Sun, 11 Apr 2021 21:22:25 -0500 Subject: [PATCH 10/22] scatter reduced point/poly/distances into place --- .../quadtree_point_to_nearest_polyline.cu | 312 ++++++++++-------- 1 file changed, 171 insertions(+), 141 deletions(-) diff --git a/cpp/src/join/quadtree_point_to_nearest_polyline.cu b/cpp/src/join/quadtree_point_to_nearest_polyline.cu index 314e2b5ef..b53c04a3d 100644 --- a/cpp/src/join/quadtree_point_to_nearest_polyline.cu +++ b/cpp/src/join/quadtree_point_to_nearest_polyline.cu @@ -158,158 +158,188 @@ struct compute_quadtree_point_to_nearest_polyline { rmm::cuda_stream_view stream, rmm::mr::device_memory_resource *mr) { - auto quad_lengths = quadtree.column(3); - auto quad_offsets = quadtree.column(4); - auto num_poly_quad_pairs = poly_quad_pairs.num_rows(); - - rmm::device_uvector quad_poly_indices(num_poly_quad_pairs, stream); - rmm::device_uvector quad_point_offsets(num_poly_quad_pairs, stream); - rmm::device_uvector quad_point_lengths(num_poly_quad_pairs, stream); - - auto quad_poly_indices_and_quad_offsets_lengths = thrust::make_zip_iterator( - // poly_indices - poly_quad_pairs.column(0).begin(), - // quad_offsets - thrust::make_permutation_iterator(quadtree.column(4).begin(), - poly_quad_pairs.column(1).begin()), - // quad_lengths - thrust::make_permutation_iterator(quadtree.column(3).begin(), - poly_quad_pairs.column(1).begin())); - - // Copy the quad/polyline indices into local vectors instead of sorting the input columns - thrust::copy( - rmm::exec_policy(stream), - quad_poly_indices_and_quad_offsets_lengths, - quad_poly_indices_and_quad_offsets_lengths + num_poly_quad_pairs, - thrust::make_zip_iterator( - quad_poly_indices.begin(), quad_point_offsets.begin(), quad_point_lengths.begin())); - - // Sort the quad/polyline index pairs by quadrant so that the `point_indices` are - // produced/reduced in ascending order below. - thrust::stable_sort_by_key( - rmm::exec_policy(stream), - quad_point_offsets.begin(), - quad_point_offsets.end(), - thrust::make_zip_iterator(quad_point_lengths.begin(), quad_poly_indices.begin())); - - // Compute a "local" set of zero-based point offsets from number of points in each quadrant - // Use `num_poly_quad_pairs + 1` as the length so that the last element produced by - // `inclusive_scan` is the total number of points to be tested against any polyline. - rmm::device_uvector local_point_offsets(num_poly_quad_pairs + 1, stream); - - // Ensure local point offsets starts at 0 - uint32_t init{0}; - local_point_offsets.set_element_async(0, init, stream); - - thrust::inclusive_scan(rmm::exec_policy(stream), - quad_point_lengths.begin(), - quad_point_lengths.end(), - local_point_offsets.begin() + 1); - - // The last element is the total number of points to test against any polyline. - auto num_point_poly_pairs = local_point_offsets.back_element(stream); - - // Enumerate the point X/Ys using the sorted `point_indices` (from quadtree construction) - auto point_xys_iter = thrust::make_permutation_iterator( - thrust::make_zip_iterator(point_x.begin(), point_y.begin()), - point_indices.begin()); - - // - // Compute the combination of point and polyline index pairs. For each polyline/quadrant pair, - // enumerate pairs of (point_index, polyline_index) for each point in each quadrant, and - // calculate the minimum distance between each point/poly pair. - // - // In Python pseudocode: - // ``` - // pp_pairs_and_dist = [] - // for polyline, quadrant in pq_pairs: - // for point in quadrant: - // pp_pairs_and_dist.append((point, polyline, min_distance(point, polyline))) - // ``` - // - // However, the above psuedocode produces values in an order such that the distance - // from a point to each polyline cannot be reduced with `thrust::reduce_by_key`: - // ``` - // point | polyline | distance - // 0 | 0 | 10.0 - // 1 | 0 | 30.0 - // 2 | 0 | 20.0 - // 0 | 1 | 30.0 - // 1 | 1 | 20.0 - // 2 | 1 | 10.0 - // ``` - // - // In order to use `thrust::reduce_by_key` to compute the minimum distance from a point to - // the polylines in its quadrant, the above table needs to be sorted by `point` instead of - // `polyline`: - // ``` - // point | polyline | distance - // 0 | 0 | 10.0 - // 0 | 1 | 30.0 - // 1 | 0 | 30.0 - // 1 | 1 | 20.0 - // 2 | 0 | 20.0 - // 2 | 1 | 10.0 - // ``` - // - // A naive approach would be to allocate memory for the above three columns, sort the - // columns by `point`, then use `thrust::reduce_by_key` to compute the min distances. - // - // The sizes of the intermediate buffers required can easily grow beyond available - // device memory, so a better approach is to use a Thrust iterator to yield values - // in the sorted order on demand instead, which is what we're doing here. - // - - auto all_point_poly_indices_and_distances = thrust::make_transform_iterator( - thrust::make_counting_iterator(0u), - compute_point_poly_indices_and_distances{ - point_xys_iter, - local_point_offsets.begin(), - local_point_offsets.end(), + // Wrapped in an IIFE so intermediate allocations are freed on return + auto reduction = [&]() { + auto quad_lengths = quadtree.column(3); + auto quad_offsets = quadtree.column(4); + auto num_poly_quad_pairs = poly_quad_pairs.num_rows(); + + rmm::device_uvector quad_poly_indices(num_poly_quad_pairs, stream); + rmm::device_uvector quad_point_offsets(num_poly_quad_pairs, stream); + rmm::device_uvector quad_point_lengths(num_poly_quad_pairs, stream); + + auto quad_poly_indices_and_quad_offsets_lengths = thrust::make_zip_iterator( + // poly_indices + poly_quad_pairs.column(0).begin(), + // quad_offsets + thrust::make_permutation_iterator(quadtree.column(4).begin(), + poly_quad_pairs.column(1).begin()), + // quad_lengths + thrust::make_permutation_iterator(quadtree.column(3).begin(), + poly_quad_pairs.column(1).begin())); + + // Copy the quad/polyline indices into local vectors instead of sorting the input columns + thrust::copy( + rmm::exec_policy(stream), + quad_poly_indices_and_quad_offsets_lengths, + quad_poly_indices_and_quad_offsets_lengths + num_poly_quad_pairs, + thrust::make_zip_iterator( + quad_poly_indices.begin(), quad_point_offsets.begin(), quad_point_lengths.begin())); + + // Sort the quad/polyline index pairs by quadrant so that the `point_indices` are + // produced/reduced in ascending order below. + thrust::stable_sort_by_key( + rmm::exec_policy(stream), quad_point_offsets.begin(), quad_point_offsets.end(), - quad_point_lengths.begin(), - quad_poly_indices.begin(), - *cudf::column_device_view::create(poly_offsets, stream), - *cudf::column_device_view::create(poly_points_x, stream), - *cudf::column_device_view::create(poly_points_y, stream)}); - - auto all_point_indices = - thrust::make_transform_iterator(all_point_poly_indices_and_distances, - [] __device__(auto const &x) { return thrust::get<0>(x); }); + thrust::make_zip_iterator(quad_point_lengths.begin(), quad_poly_indices.begin())); + + // Compute a "local" set of zero-based point offsets from number of points in each quadrant + // Use `num_poly_quad_pairs + 1` as the length so that the last element produced by + // `inclusive_scan` is the total number of points to be tested against any polyline. + rmm::device_uvector local_point_offsets(num_poly_quad_pairs + 1, stream); + + thrust::inclusive_scan(rmm::exec_policy(stream), + quad_point_lengths.begin(), + quad_point_lengths.end(), + local_point_offsets.begin() + 1); + + // Ensure local point offsets starts at 0 + uint32_t init{0}; + local_point_offsets.set_element_async(0, init, stream); + + // The last element is the total number of points to test against any polyline. + auto num_point_poly_pairs = local_point_offsets.back_element(stream); + + // Enumerate the point X/Ys using the sorted `point_indices` (from quadtree construction) + auto point_xys_iter = thrust::make_permutation_iterator( + thrust::make_zip_iterator(point_x.begin(), point_y.begin()), + point_indices.begin()); + + // + // Compute the combination of point and polyline index pairs. For each polyline/quadrant pair, + // enumerate pairs of (point_index, polyline_index) for each point in each quadrant, and + // calculate the minimum distance between each point/poly pair. + // + // In Python pseudocode: + // ``` + // pp_pairs_and_dist = [] + // for polyline, quadrant in pq_pairs: + // for point in quadrant: + // pp_pairs_and_dist.append((point, polyline, min_distance(point, polyline))) + // ``` + // + // However, the above psuedocode produces values in an order such that the distance + // from a point to each polyline cannot be reduced with `thrust::reduce_by_key`: + // ``` + // point | polyline | distance + // 0 | 0 | 10.0 + // 1 | 0 | 30.0 + // 2 | 0 | 20.0 + // 0 | 1 | 30.0 + // 1 | 1 | 20.0 + // 2 | 1 | 10.0 + // ``` + // + // In order to use `thrust::reduce_by_key` to compute the minimum distance from a point to + // the polylines in its quadrant, the above table needs to be sorted by `point` instead of + // `polyline`: + // ``` + // point | polyline | distance + // 0 | 0 | 10.0 + // 0 | 1 | 30.0 + // 1 | 0 | 30.0 + // 1 | 1 | 20.0 + // 2 | 0 | 20.0 + // 2 | 1 | 10.0 + // ``` + // + // A naive approach would be to allocate memory for the above three columns, sort the + // columns by `point`, then use `thrust::reduce_by_key` to compute the min distances. + // + // The sizes of the intermediate buffers required can easily grow beyond available + // device memory, so a better approach is to use a Thrust iterator to yield values + // in the sorted order on demand instead, which is what we're doing here. + // + + auto all_point_poly_indices_and_distances = thrust::make_transform_iterator( + thrust::make_counting_iterator(0u), + compute_point_poly_indices_and_distances{ + point_xys_iter, + local_point_offsets.begin(), + local_point_offsets.end(), + quad_point_offsets.begin(), + quad_point_offsets.end(), + quad_point_lengths.begin(), + quad_poly_indices.begin(), + *cudf::column_device_view::create(poly_offsets, stream), + *cudf::column_device_view::create(poly_points_x, stream), + *cudf::column_device_view::create(poly_points_y, stream)}); + + auto all_point_indices = + thrust::make_transform_iterator(all_point_poly_indices_and_distances, + [] __device__(auto const &x) { return thrust::get<0>(x); }); + + // Allocate vectors for the distances min reduction + rmm::device_uvector point_idxs(point_x.size(), stream); + rmm::device_uvector poly_idxs(point_x.size(), stream); + rmm::device_uvector distances(point_x.size(), stream); + + // Reduce the intermediate point/polyline indices to lists of point/polyline index pairs and + // distances, selecting the polyline index closest to each point. + auto const num_distances = thrust::distance( + point_idxs.begin(), + thrust::reduce_by_key( + rmm::exec_policy(stream), + // point indices in + all_point_indices, + all_point_indices + num_point_poly_pairs, + all_point_poly_indices_and_distances, + // point indices out + point_idxs.begin(), + // point/polyline indices and distances out + thrust::make_zip_iterator( + thrust::make_discard_iterator(), poly_idxs.begin(), distances.begin()), + // comparator + thrust::equal_to(), + // binop to select the point/polyline pair with the smallest distance + [] __device__(auto const &lhs, auto const &rhs) { + T const &d_lhs = thrust::get<2>(lhs); + T const &d_rhs = thrust::get<2>(rhs); + return (d_lhs == T{0}) ? rhs : (d_rhs == T{0}) ? lhs : d_lhs < d_rhs ? lhs : rhs; + }) + .first); + + point_idxs.resize(num_distances, stream); + point_idxs.shrink_to_fit(stream); + + poly_idxs.resize(num_distances, stream); + poly_idxs.shrink_to_fit(stream); + + distances.resize(num_distances, stream); + distances.shrink_to_fit(stream); + + return std::make_tuple(std::move(point_idxs), std::move(poly_idxs), std::move(distances)); + }(); + + auto const &point_idxs = std::get<0>(reduction); + auto const &poly_idxs = std::get<1>(reduction); + auto const &distances = std::get<2>(reduction); // Allocate output columns for the point and polyline index pairs and their distances auto point_index_col = make_fixed_width_column(point_x.size(), stream, mr); auto poly_index_col = make_fixed_width_column(point_x.size(), stream, mr); auto distance_col = make_fixed_width_column(point_x.size(), stream, mr); - // Fill output distance column with T_MAX because `reduce_by_key` selector is associative - thrust::fill(rmm::exec_policy(stream), - distance_col->mutable_view().template begin(), - distance_col->mutable_view().template end(), - std::numeric_limits::max()); - - // Reduce the intermediate point/poly indices to lists of point/polyline - // index pairs and distances, selecting the polyline index closest to each point. - thrust::reduce_by_key( + // scatter the values from their positions after reduction into their output positions + thrust::scatter( rmm::exec_policy(stream), - // point indices in - all_point_indices, - all_point_indices + num_point_poly_pairs, - all_point_poly_indices_and_distances, - // point indices out - thrust::make_discard_iterator(), - // point_index_col->mutable_view().begin(), - // point/polyline indices and distances out + thrust::make_zip_iterator(point_idxs.begin(), poly_idxs.begin(), distances.begin()), + thrust::make_zip_iterator(point_idxs.end(), poly_idxs.end(), distances.end()), + point_idxs.begin(), thrust::make_zip_iterator(point_index_col->mutable_view().begin(), poly_index_col->mutable_view().begin(), - distance_col->mutable_view().template begin()), - // comparator - thrust::equal_to(), - // binop to select the point/polyline pair with the smallest distance - [] __device__(auto const &a, auto const &b) { - return thrust::get<2>(a) < thrust::get<2>(b) ? a : b; - }); + distance_col->mutable_view().template begin())); std::vector> cols{}; cols.reserve(3); From 2cdc33a0c68f6e0506840bf16f4b324a1ccbaea0 Mon Sep 17 00:00:00 2001 From: ptaylor Date: Mon, 12 Apr 2021 07:20:04 -0500 Subject: [PATCH 11/22] deterministically handle case where distances between a point and multiple lines are the same --- .../quadtree_point_to_nearest_polyline.cu | 60 ++++++++++++------- 1 file changed, 38 insertions(+), 22 deletions(-) diff --git a/cpp/src/join/quadtree_point_to_nearest_polyline.cu b/cpp/src/join/quadtree_point_to_nearest_polyline.cu index b53c04a3d..7d30a55da 100644 --- a/cpp/src/join/quadtree_point_to_nearest_polyline.cu +++ b/cpp/src/join/quadtree_point_to_nearest_polyline.cu @@ -15,6 +15,7 @@ */ #include +#include #include #include @@ -36,6 +37,7 @@ #include #include +#include "thrust/fill.h" namespace cuspatial { namespace detail { @@ -285,30 +287,44 @@ struct compute_quadtree_point_to_nearest_polyline { rmm::device_uvector poly_idxs(point_x.size(), stream); rmm::device_uvector distances(point_x.size(), stream); + // Fill distances with 0 + thrust::fill(rmm::exec_policy(stream), distances.begin(), distances.end(), T{0}); + // Reduce the intermediate point/polyline indices to lists of point/polyline index pairs and // distances, selecting the polyline index closest to each point. - auto const num_distances = thrust::distance( - point_idxs.begin(), - thrust::reduce_by_key( - rmm::exec_policy(stream), - // point indices in - all_point_indices, - all_point_indices + num_point_poly_pairs, - all_point_poly_indices_and_distances, - // point indices out - point_idxs.begin(), - // point/polyline indices and distances out - thrust::make_zip_iterator( - thrust::make_discard_iterator(), poly_idxs.begin(), distances.begin()), - // comparator - thrust::equal_to(), - // binop to select the point/polyline pair with the smallest distance - [] __device__(auto const &lhs, auto const &rhs) { - T const &d_lhs = thrust::get<2>(lhs); - T const &d_rhs = thrust::get<2>(rhs); - return (d_lhs == T{0}) ? rhs : (d_rhs == T{0}) ? lhs : d_lhs < d_rhs ? lhs : rhs; - }) - .first); + auto const num_distances = + thrust::distance(point_idxs.begin(), + thrust::reduce_by_key( + rmm::exec_policy(stream), + // point indices in + all_point_indices, + all_point_indices + num_point_poly_pairs, + all_point_poly_indices_and_distances, + // point indices out + point_idxs.begin(), + // point/polyline indices and distances out + thrust::make_zip_iterator( + thrust::make_discard_iterator(), poly_idxs.begin(), distances.begin()), + // comparator + thrust::equal_to(), + // binop to select the point/polyline pair with the smallest distance + [] __device__(auto const &lhs, auto const &rhs) { + T const &d_lhs = thrust::get<2>(lhs); + T const &d_rhs = thrust::get<2>(rhs); + // If lhs distance is 0, choose rhs + if (d_lhs == T{0}) { return rhs; } + // if rhs distance is 0, choose lhs + if (d_rhs == T{0}) { return lhs; } + // If distances to lhs/rhs are the same, choose poly with smallest id + if (d_lhs == d_rhs) { + auto const &i_lhs = thrust::get<1>(lhs); + auto const &i_rhs = thrust::get<1>(rhs); + return i_lhs < i_rhs ? lhs : rhs; + } + // Otherwise choose poly with smallest distance + return d_lhs < d_rhs ? lhs : rhs; + }) + .first); point_idxs.resize(num_distances, stream); point_idxs.shrink_to_fit(stream); From 26253fac7127e4af7de94d298ba8cc18e62bd74f Mon Sep 17 00:00:00 2001 From: ptaylor Date: Tue, 13 Apr 2021 10:59:06 -0500 Subject: [PATCH 12/22] use C++17 and cuDF 0.20.* --- conda/environments/cuspatial_dev_cuda11.0.yml | 4 ++-- ...ial_dev_cuda10.1.yml => cuspatial_dev_cuda11.1.yml} | 4 ++-- ...ial_dev_cuda10.2.yml => cuspatial_dev_cuda11.2.yml} | 4 ++-- cpp/CMakeLists.txt | 4 ++-- cpp/benchmarks/CMakeLists.txt | 2 ++ cpp/cmake/Modules/SetGPUArchs.cmake | 10 ---------- java/src/main/native/CMakeLists.txt | 6 ++---- 7 files changed, 12 insertions(+), 22 deletions(-) rename conda/environments/{cuspatial_dev_cuda10.1.yml => cuspatial_dev_cuda11.1.yml} (87%) rename conda/environments/{cuspatial_dev_cuda10.2.yml => cuspatial_dev_cuda11.2.yml} (87%) diff --git a/conda/environments/cuspatial_dev_cuda11.0.yml b/conda/environments/cuspatial_dev_cuda11.0.yml index e1ff6c339..797ad5059 100644 --- a/conda/environments/cuspatial_dev_cuda11.0.yml +++ b/conda/environments/cuspatial_dev_cuda11.0.yml @@ -8,11 +8,11 @@ channels: dependencies: - clang=8.0.1 - clang-tools=8.0.1 - - cudf=0.16.* + - cudf=0.20.* - cudatoolkit=11.0 - gdal>=3.0.2 - geopandas=0.7.0 - - cmake>=3.14 + - cmake>=3.18 - cython>=0.29,<0.30 - gtest - gmock diff --git a/conda/environments/cuspatial_dev_cuda10.1.yml b/conda/environments/cuspatial_dev_cuda11.1.yml similarity index 87% rename from conda/environments/cuspatial_dev_cuda10.1.yml rename to conda/environments/cuspatial_dev_cuda11.1.yml index b3bb5c5b5..db43bad02 100644 --- a/conda/environments/cuspatial_dev_cuda10.1.yml +++ b/conda/environments/cuspatial_dev_cuda11.1.yml @@ -8,8 +8,8 @@ channels: dependencies: - clang=8.0.1 - clang-tools=8.0.1 - - cudf=0.16.* - - cudatoolkit=10.1 + - cudf=0.20.* + - cudatoolkit=11.1 - gdal>=3.0.2 - geopandas=0.7.0 - cmake>=3.14 diff --git a/conda/environments/cuspatial_dev_cuda10.2.yml b/conda/environments/cuspatial_dev_cuda11.2.yml similarity index 87% rename from conda/environments/cuspatial_dev_cuda10.2.yml rename to conda/environments/cuspatial_dev_cuda11.2.yml index 8d8d32946..da444b761 100644 --- a/conda/environments/cuspatial_dev_cuda10.2.yml +++ b/conda/environments/cuspatial_dev_cuda11.2.yml @@ -8,8 +8,8 @@ channels: dependencies: - clang=8.0.1 - clang-tools=8.0.1 - - cudf=0.16.* - - cudatoolkit=10.2 + - cudf=0.20.* + - cudatoolkit=11.2 - gdal>=3.0.2 - geopandas=0.7.0 - cmake>=3.14 diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 17ad25434..9530dc472 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -141,9 +141,9 @@ set_target_properties(cuspatial PROPERTIES BUILD_RPATH "\$ORIGIN" INSTALL_RPATH "\$ORIGIN" # set target compile options - CXX_STANDARD 14 + CXX_STANDARD 17 CXX_STANDARD_REQUIRED ON - CUDA_STANDARD 14 + CUDA_STANDARD 17 CUDA_STANDARD_REQUIRED ON POSITION_INDEPENDENT_CODE ON INTERFACE_POSITION_INDEPENDENT_CODE ON diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt index dbb044476..93dd41795 100644 --- a/cpp/benchmarks/CMakeLists.txt +++ b/cpp/benchmarks/CMakeLists.txt @@ -21,6 +21,8 @@ add_library(cuspatial_benchmark_common OBJECT synchronization/synchronization.cpp) +target_compile_features(cuspatial_benchmark_common PUBLIC cxx_std_17 cuda_std_17) + target_link_libraries(cuspatial_benchmark_common PUBLIC benchmark::benchmark cudf::cudftestutil diff --git a/cpp/cmake/Modules/SetGPUArchs.cmake b/cpp/cmake/Modules/SetGPUArchs.cmake index f09d5ead8..8ab3c14d6 100644 --- a/cpp/cmake/Modules/SetGPUArchs.cmake +++ b/cpp/cmake/Modules/SetGPUArchs.cmake @@ -38,16 +38,6 @@ if(NOT DEFINED CUDAToolkit_VERSION AND CMAKE_CUDA_COMPILER) unset(NVCC_OUT) endif() -if(CUDAToolkit_VERSION_MAJOR LESS 11) - list(REMOVE_ITEM SUPPORTED_CUDA_ARCHITECTURES "80") -endif() -if(CUDAToolkit_VERSION_MAJOR LESS 10) - list(REMOVE_ITEM SUPPORTED_CUDA_ARCHITECTURES "75") -endif() -if(CUDAToolkit_VERSION_MAJOR LESS 9) - list(REMOVE_ITEM SUPPORTED_CUDA_ARCHITECTURES "70") -endif() - if(${PROJECT_NAME}_BUILD_FOR_ALL_ARCHS) set(CMAKE_CUDA_ARCHITECTURES ${SUPPORTED_CUDA_ARCHITECTURES}) diff --git a/java/src/main/native/CMakeLists.txt b/java/src/main/native/CMakeLists.txt index 79efee230..de9676445 100755 --- a/java/src/main/native/CMakeLists.txt +++ b/java/src/main/native/CMakeLists.txt @@ -35,12 +35,12 @@ endif() ################################################################################################### # - compiler options ------------------------------------------------------------------------------ -set(CMAKE_CXX_STANDARD 14) +set(CMAKE_CXX_STANDARD 17) set(CMAKE_C_COMPILER $ENV{CC}) set(CMAKE_CXX_COMPILER $ENV{CXX}) set(CMAKE_CXX_STANDARD_REQUIRED ON) -set(CMAKE_CUDA_STANDARD 14) +set(CMAKE_CUDA_STANDARD 17) set(CMAKE_CUDA_STANDARD_REQUIRED ON) if(CMAKE_COMPILER_IS_GNUCXX) @@ -225,5 +225,3 @@ endif(PER_THREAD_DEFAULT_STREAM) # - link libraries -------------------------------------------------------------------------------- target_link_libraries(cuspatialjni cuspatial) - - From e92a56a0d3b72e1d9a70c67141af843ebd4ee6ec Mon Sep 17 00:00:00 2001 From: ptaylor Date: Tue, 13 Apr 2021 10:59:25 -0500 Subject: [PATCH 13/22] update README.md with latest features + markdown lint fixes --- README.md | 60 ++++++++++++++++++++++++++++++++----------------------- 1 file changed, 35 insertions(+), 25 deletions(-) diff --git a/README.md b/README.md index 10171f6ce..80338852b 100644 --- a/README.md +++ b/README.md @@ -5,8 +5,10 @@ **NOTE:** cuSpatial depends on [cuDF](https://github.com/rapidsai/cudf) and [RMM](https://github.com/rapidsai/rmm) from [RAPIDS](https://rapids.ai/). -## Implemented operations: +## Operations + cuSpatial supports the following operations on spatial and trajectory data: + 1. Spatial window query 2. Point-in-polygon test 3. Haversine distance @@ -14,22 +16,27 @@ cuSpatial supports the following operations on spatial and trajectory data: 5. Deriving trajectories from point location data 6. Computing distance/speed of trajectories 7. Computing spatial bounding boxes of trajectories +8. Quadtree-based indexing for large-scale point data +9. Quadtree-based point-in-polygon spatial join +10. Quadtree-based point-to-polyline nearest neighbor distance + +Future support is planned for the following operations: -Future support is planned for the following operations. 1. Temporal window query 2. Temporal point query (year+month+day+hour+minute+second+millisecond) -3. Point-to-polyline nearest neighbor distance -4. Grid-based indexing for points and polygons -5. Quadtree-based indexing for large-scale point data -6. R-Tree-based indexing for Polygons/Polylines +3. Grid-based indexing for points and polygons +4. R-Tree-based indexing for Polygons/Polylines ## Install from Conda + To install via conda: -``` + +```shell conda install -c conda-forge -c rapidsai-nightly cuspatial ``` ## Install from Source + To build and install cuSpatial from source: ### Install dependencies @@ -46,28 +53,31 @@ environment created in step 3 is active. 1. export `CUSPATIAL_HOME=$(pwd)/cuspatial` 2. clone the cuSpatial repo -``` -git clone --recurse-submodules https://github.com/rapidsai/cuspatial.git $CUSPATIAL_HOME -``` + ```shell + git clone --recurse-submodules https://github.com/rapidsai/cuspatial.git $CUSPATIAL_HOME + ``` -3. Compile and install -Similar to cuDF (version 0.11), simplely run 'build.sh' diectly under $CUSPATIAL_HOME
-Note that a "build" dir is created automatically under $CUSPATIAL_HOME/cpp +3. Compile and install -4. Run C++/Python test code
+ Similar to cuDF (version 0.20), simply run `build.sh` diectly under `$CUSPATIAL_HOME`. -Some tests using inline data can be run directly, e.g., -``` -$CUSPATIAL_HOME/cpp/build/gtests/LEGACY_HAUSDORFF_TEST -$CUSPATIAL_HOME/cpp/build/gtests/POINT_IN_POLYGON_TEST -python python/cuspatial/cuspatial/tests/legacy/test_hausdorff_distance.py -python python/cuspatial/cuspatial/tests/test_pip.py -``` + Note that a "build" dir is created automatically under `$CUSPATIAL_HOME/cpp`. + +4. Run C++/Python test code + + Some tests using inline data can be run directly, e.g.: + + ```shell + $CUSPATIAL_HOME/cpp/build/gtests/LEGACY_HAUSDORFF_TEST + $CUSPATIAL_HOME/cpp/build/gtests/POINT_IN_POLYGON_TEST + python python/cuspatial/cuspatial/tests/legacy/test_hausdorff_distance.py + python python/cuspatial/cuspatial/tests/test_pip.py + ``` -Some other tests involve I/O from data files under $CUSPATIAL_HOME/test_fixtures. -For example, $CUSPATIAL_HOME/cpp/build/gtests/SHAPEFILE_READER_TEST requires three -pre-generated polygon shapefiles that contain 0, 1 and 2 polygons, respectively. They are available at -$CUSPATIAL_HOME/test_fixtures/shapefiles
+ Some other tests involve I/O from data files under $CUSPATIAL_HOME/test_fixtures. + For example, $CUSPATIAL_HOME/cpp/build/gtests/SHAPEFILE_READER_TEST requires three + pre-generated polygon shapefiles that contain 0, 1 and 2 polygons, respectively. They are available at + $CUSPATIAL_HOME/test_fixtures/shapefiles
**NOTE:** Currently, cuSpatial supports reading point/polyine/polygon data using Structure of Array (SoA) format and a [shapefile reader](./cpp/src/io/shp) From 5e8661a3508ee112ff89b75f0abe067259d36e99 Mon Sep 17 00:00:00 2001 From: ptaylor Date: Tue, 13 Apr 2021 15:00:55 -0500 Subject: [PATCH 14/22] sort the results of quad bbox join by quad offset --- .../quadtree_point_to_nearest_polyline.cu | 76 +++++++------------ cpp/src/join/quadtree_poly_filtering.cu | 17 +++++ .../join/point_in_polygon_test_small.cpp | 4 +- .../join/quadtree_polygon_filtering_test.cu | 4 +- .../join/quadtree_polyline_filtering_test.cu | 9 +-- 5 files changed, 52 insertions(+), 58 deletions(-) diff --git a/cpp/src/join/quadtree_point_to_nearest_polyline.cu b/cpp/src/join/quadtree_point_to_nearest_polyline.cu index 86d3e7219..78432b02e 100644 --- a/cpp/src/join/quadtree_point_to_nearest_polyline.cu +++ b/cpp/src/join/quadtree_point_to_nearest_polyline.cu @@ -15,7 +15,6 @@ */ #include -#include #include #include @@ -32,13 +31,14 @@ #include #include +#include #include #include #include #include +#include #include -#include "thrust/fill.h" namespace cuspatial { namespace detail { @@ -62,8 +62,9 @@ inline __device__ thrust::tuple get_quad_poly_and_local_poin global_index - *local_point_offset); } +template inline __device__ thrust::tuple get_local_poly_index_and_count( - uint32_t const poly_index, uint32_t const *quad_offsets, uint32_t const *quad_offsets_end) + uint32_t const poly_index, QuadOffsetsIter quad_offsets, QuadOffsetsIter quad_offsets_end) { auto const lhs_end = quad_offsets; auto const rhs_end = quad_offsets_end; @@ -80,13 +81,14 @@ inline __device__ thrust::tuple get_local_poly_index_and_cou static_cast(thrust::distance(lhs, rhs))); } +template inline __device__ thrust::tuple get_transposed_point_and_pair_index( uint32_t const global_index, uint32_t const *point_offsets, uint32_t const *point_offsets_end, - uint32_t const *quad_offsets, - uint32_t const *quad_offsets_end, - uint32_t const *quad_lengths) + QuadOffsetsIter quad_offsets, + QuadOffsetsIter quad_offsets_end, + QuadLengthsIter quad_lengths) { uint32_t quad_poly_index{0}, local_point_offset{0}; thrust::tie(quad_poly_index, local_point_offset) = @@ -109,14 +111,14 @@ inline __device__ thrust::tuple get_transposed_point_and_pai (transposed_point_start % num_polys_in_quad) + quad_poly_offset); } -template +template struct compute_point_poly_indices_and_distances { PointIter points; uint32_t const *point_offsets; uint32_t const *point_offsets_end; - uint32_t const *quad_offsets; - uint32_t const *quad_offsets_end; - uint32_t const *quad_lengths; + QuadOffsetsIter quad_offsets; + QuadOffsetsIter quad_offsets_end; + QuadLengthsIter quad_lengths; uint32_t const *poly_indices; cudf::column_device_view const poly_offsets; cudf::column_device_view const poly_points_x; @@ -163,39 +165,12 @@ struct compute_quadtree_point_to_nearest_polyline { { // Wrapped in an IIFE so intermediate allocations are freed on return auto reduction = [&]() { - auto quad_lengths = quadtree.column(3); - auto quad_offsets = quadtree.column(4); auto num_poly_quad_pairs = poly_quad_pairs.num_rows(); - - rmm::device_uvector quad_poly_indices(num_poly_quad_pairs, stream); - rmm::device_uvector quad_point_offsets(num_poly_quad_pairs, stream); - rmm::device_uvector quad_point_lengths(num_poly_quad_pairs, stream); - - auto quad_poly_indices_and_quad_offsets_lengths = thrust::make_zip_iterator( - // poly_indices - poly_quad_pairs.column(0).begin(), - // quad_offsets - thrust::make_permutation_iterator(quadtree.column(4).begin(), - poly_quad_pairs.column(1).begin()), - // quad_lengths - thrust::make_permutation_iterator(quadtree.column(3).begin(), - poly_quad_pairs.column(1).begin())); - - // Copy the quad/polyline indices into local vectors instead of sorting the input columns - thrust::copy( - rmm::exec_policy(stream), - quad_poly_indices_and_quad_offsets_lengths, - quad_poly_indices_and_quad_offsets_lengths + num_poly_quad_pairs, - thrust::make_zip_iterator( - quad_poly_indices.begin(), quad_point_offsets.begin(), quad_point_lengths.begin())); - - // Sort the quad/polyline index pairs by quadrant so that the `point_indices` are - // produced/reduced in ascending order below. - thrust::stable_sort_by_key( - rmm::exec_policy(stream), - quad_point_offsets.begin(), - quad_point_offsets.end(), - thrust::make_zip_iterator(quad_point_lengths.begin(), quad_poly_indices.begin())); + auto poly_indices = poly_quad_pairs.column(0).begin(); + auto quad_lengths = thrust::make_permutation_iterator( + quadtree.column(3).begin(), poly_quad_pairs.column(1).begin()); + auto quad_offsets = thrust::make_permutation_iterator( + quadtree.column(4).begin(), poly_quad_pairs.column(1).begin()); // Compute a "local" set of zero-based point offsets from number of points in each quadrant // Use `num_poly_quad_pairs + 1` as the length so that the last element produced by @@ -203,8 +178,8 @@ struct compute_quadtree_point_to_nearest_polyline { rmm::device_uvector local_point_offsets(num_poly_quad_pairs + 1, stream); thrust::inclusive_scan(rmm::exec_policy(stream), - quad_point_lengths.begin(), - quad_point_lengths.end(), + quad_lengths, + quad_lengths + num_poly_quad_pairs, local_point_offsets.begin() + 1); // Ensure local point offsets starts at 0 @@ -267,14 +242,17 @@ struct compute_quadtree_point_to_nearest_polyline { auto all_point_poly_indices_and_distances = thrust::make_transform_iterator( thrust::make_counting_iterator(0u), - compute_point_poly_indices_and_distances{ + compute_point_poly_indices_and_distances{ point_xys_iter, local_point_offsets.begin(), local_point_offsets.end(), - quad_point_offsets.begin(), - quad_point_offsets.end(), - quad_point_lengths.begin(), - quad_poly_indices.begin(), + quad_offsets, + quad_offsets + num_poly_quad_pairs, + quad_lengths, + poly_indices, *cudf::column_device_view::create(poly_offsets, stream), *cudf::column_device_view::create(poly_points_x, stream), *cudf::column_device_view::create(poly_points_y, stream)}); diff --git a/cpp/src/join/quadtree_poly_filtering.cu b/cpp/src/join/quadtree_poly_filtering.cu index c77467fff..c3d5746ed 100644 --- a/cpp/src/join/quadtree_poly_filtering.cu +++ b/cpp/src/join/quadtree_poly_filtering.cu @@ -176,6 +176,23 @@ inline std::unique_ptr join_quadtree_and_bboxes(cudf::table_view co num_results += num_leaves; } + // Sort the output poly/quad indices by quadrant + [&]() { + // Copy the relevant `node_offsets` into a tmp vec so we don't modify the quadtree column + rmm::device_uvector tmp_node_offsets(num_results, stream); + + auto const iter = + thrust::make_permutation_iterator(node_offsets.begin(), out_node_idxs.begin()); + + thrust::copy(rmm::exec_policy(stream), iter, iter + num_results, tmp_node_offsets.begin()); + + thrust::stable_sort_by_key( + rmm::exec_policy(stream), + tmp_node_offsets.begin(), + tmp_node_offsets.end(), + thrust::make_zip_iterator(out_poly_idxs.begin(), out_node_idxs.begin())); + }(); + std::vector> cols{}; cols.reserve(2); cols.push_back(make_fixed_width_column(num_results, stream, mr)); diff --git a/cpp/tests/join/point_in_polygon_test_small.cpp b/cpp/tests/join/point_in_polygon_test_small.cpp index 4ae68dc7e..2f6543666 100644 --- a/cpp/tests/join/point_in_polygon_test_small.cpp +++ b/cpp/tests/join/point_in_polygon_test_small.cpp @@ -172,8 +172,8 @@ TYPED_TEST(PIPRefineTestSmall, TestSmall) expect_tables_equivalent( cudf::table_view{ {fixed_width_column_wrapper( - {0, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3}), + {3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 3}), fixed_width_column_wrapper( - {62, 60, 45, 46, 47, 48, 49, 50, 51, 52, 54, 28, 29, 30, 31, 32, 33, 34, 35})}}, + {28, 29, 30, 31, 32, 33, 34, 35, 45, 46, 47, 48, 49, 50, 51, 52, 54, 62, 60})}}, *point_in_polygon_pairs); } diff --git a/cpp/tests/join/quadtree_polygon_filtering_test.cu b/cpp/tests/join/quadtree_polygon_filtering_test.cu index d9236fa7a..2b6afed3a 100644 --- a/cpp/tests/join/quadtree_polygon_filtering_test.cu +++ b/cpp/tests/join/quadtree_polygon_filtering_test.cu @@ -235,7 +235,7 @@ TYPED_TEST(QuadtreePolygonFilteringTest, test_small) "a polygon-quadrant pair table must have 2 columns (polygon-index, quadrant-index)"); expect_tables_equal( - cudf::table_view{{fixed_width_column_wrapper({0, 3, 1, 2, 1, 1, 3, 3}), - fixed_width_column_wrapper({2, 2, 6, 6, 12, 13, 10, 11})}}, + cudf::table_view{{fixed_width_column_wrapper({3, 3, 1, 2, 1, 1, 0, 3}), + fixed_width_column_wrapper({10, 11, 6, 6, 12, 13, 2, 2})}}, *polygon_quadrant_pairs); } diff --git a/cpp/tests/join/quadtree_polyline_filtering_test.cu b/cpp/tests/join/quadtree_polyline_filtering_test.cu index 1e02325e5..ad3d13da6 100644 --- a/cpp/tests/join/quadtree_polyline_filtering_test.cu +++ b/cpp/tests/join/quadtree_polyline_filtering_test.cu @@ -148,10 +148,9 @@ TYPED_TEST(QuadtreePolylineBoundingBoxJoinTest, test_small) "a polyline-quadrant pair table must have 2 columns (polyline_index, quadrant_index)"); expect_tables_equal( - cudf::table_view{ - {fixed_width_column_wrapper( - {0, 1, 3, 1, 2, 3, 3, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3}), - fixed_width_column_wrapper( - {2, 2, 2, 6, 6, 3, 6, 10, 11, 8, 10, 12, 13, 8, 10, 12, 13, 8, 9, 10, 11})}}, + cudf::table_view{{fixed_width_column_wrapper( + {3, 1, 2, 3, 3, 0, 1, 2, 3, 0, 3, 1, 2, 3, 1, 2, 1, 2, 0, 1, 3}), + fixed_width_column_wrapper({3, 8, 8, 8, 9, 10, 10, 10, 10, 11, 11, + 6, 6, 6, 12, 12, 13, 13, 2, 2, 2})}}, *polyline_quadrant_pairs); } From e329dd6d4978826ea65c0c44168c6be8b5635365 Mon Sep 17 00:00:00 2001 From: ptaylor Date: Wed, 14 Apr 2021 14:19:42 -0500 Subject: [PATCH 15/22] fix spatial join python tests for new sorting order --- .../cuspatial/tests/test_spatial_join.py | 72 +++++++++---------- 1 file changed, 36 insertions(+), 36 deletions(-) diff --git a/python/cuspatial/cuspatial/tests/test_spatial_join.py b/python/cuspatial/cuspatial/tests/test_spatial_join.py index 390cfb0ae..8afe49a4e 100644 --- a/python/cuspatial/cuspatial/tests/test_spatial_join.py +++ b/python/cuspatial/cuspatial/tests/test_spatial_join.py @@ -275,10 +275,10 @@ def test_polygon_join_small(dtype): cudf.DataFrame( { "poly_offset": cudf.Series( - [0, 3, 1, 2, 1, 1, 3, 3], dtype=np.uint32 + [3, 3, 1, 2, 1, 1, 0, 3], dtype=np.uint32 ), "quad_offset": cudf.Series( - [2, 2, 6, 6, 12, 13, 10, 11], dtype=np.uint32 + [10, 11, 6, 6, 12, 13, 2, 2], dtype=np.uint32 ), } ), @@ -322,53 +322,53 @@ def test_polyline_join_small(dtype): { "poly_offset": cudf.Series( [ - 0, - 1, 3, 1, 2, 3, 3, 0, - 0, - 1, - 1, 1, - 1, - 2, - 2, - 2, 2, 3, + 0, 3, + 1, + 2, 3, + 1, + 2, + 1, + 2, + 0, + 1, 3, ], dtype=np.uint32, ), "quad_offset": cudf.Series( [ - 2, - 2, - 2, - 6, - 6, 3, - 6, - 10, - 11, 8, - 10, - 12, - 13, 8, - 10, - 12, - 13, 8, 9, 10, + 10, + 10, + 10, + 11, 11, + 6, + 6, + 6, + 12, + 12, + 13, + 13, + 2, + 2, + 2, ], dtype=np.uint32, ), @@ -423,13 +423,19 @@ def test_quadtree_point_in_polygon_small(dtype): cudf.DataFrame( { "polygon_index": cudf.Series( - [0, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3], + [3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 3], dtype=np.uint32, ), "point_index": cudf.Series( [ - 62, - 60, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, 45, 46, 47, @@ -439,14 +445,8 @@ def test_quadtree_point_in_polygon_small(dtype): 51, 52, 54, - 28, - 29, - 30, - 31, - 32, - 33, - 34, - 35, + 62, + 60, ], dtype=np.uint32, ), From 43bbd91d6c977c9f901870015b52c5430ef7f3fe Mon Sep 17 00:00:00 2001 From: ptaylor Date: Wed, 5 May 2021 23:13:38 -0500 Subject: [PATCH 16/22] use C++17 structured binding --- cpp/src/join/quadtree_point_in_polygon.cu | 7 ++- .../quadtree_point_to_nearest_polyline.cu | 47 ++++++++----------- 2 files changed, 23 insertions(+), 31 deletions(-) diff --git a/cpp/src/join/quadtree_point_in_polygon.cu b/cpp/src/join/quadtree_point_in_polygon.cu index 0699fe6c6..55e3207ce 100644 --- a/cpp/src/join/quadtree_point_in_polygon.cu +++ b/cpp/src/join/quadtree_point_in_polygon.cu @@ -112,7 +112,7 @@ struct compute_quadtree_point_in_polygon { rmm::mr::device_memory_resource *mr) { // Wrapped in an IIFE so `local_point_offsets` is freed on return - auto poly_and_point_idxs = [&]() { + auto [poly_idxs, point_idxs, num_total_points] = [&]() { auto quad_lengths = quadtree.column(3); auto quad_offsets = quadtree.column(4); auto poly_indices = poly_quad_pairs.column(0); @@ -174,9 +174,8 @@ struct compute_quadtree_point_in_polygon { return std::make_tuple(std::move(poly_idxs), std::move(point_idxs), num_total_points); }(); - auto &poly_idxs = std::get<0>(poly_and_point_idxs); - auto &point_idxs = std::get<1>(poly_and_point_idxs); - auto &num_total_points = std::get<2>(poly_and_point_idxs); + // auto &[poly_idxs, point_idxs, num_total_points] = poly_and_point_idxs; + auto poly_and_point_indices = thrust::make_zip_iterator(poly_idxs.begin(), point_idxs.begin()); // Enumerate the point X/Ys using the sorted `point_indices` (from quadtree construction) diff --git a/cpp/src/join/quadtree_point_to_nearest_polyline.cu b/cpp/src/join/quadtree_point_to_nearest_polyline.cu index 78432b02e..6950f6018 100644 --- a/cpp/src/join/quadtree_point_to_nearest_polyline.cu +++ b/cpp/src/join/quadtree_point_to_nearest_polyline.cu @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -44,7 +45,7 @@ namespace cuspatial { namespace detail { namespace { -inline __device__ thrust::tuple get_quad_poly_and_local_point_indices( +inline __device__ std::pair get_quad_poly_and_local_point_indices( uint32_t const global_index, uint32_t const *point_offsets, uint32_t const *point_offsets_end) { // Calculate the position in "point_offsets" that `global_index` falls between. @@ -55,7 +56,7 @@ inline __device__ thrust::tuple get_quad_poly_and_local_poin // quadtree yields the "global" position in the `point_indices` map. auto const local_point_offset = thrust::upper_bound(thrust::seq, point_offsets, point_offsets_end, global_index) - 1; - return thrust::make_tuple( + return std::make_pair( // quad_poly_index thrust::distance(point_offsets, local_point_offset), // local_point_index @@ -63,7 +64,7 @@ inline __device__ thrust::tuple get_quad_poly_and_local_poin } template -inline __device__ thrust::tuple get_local_poly_index_and_count( +inline __device__ std::pair get_local_poly_index_and_count( uint32_t const poly_index, QuadOffsetsIter quad_offsets, QuadOffsetsIter quad_offsets_end) { auto const lhs_end = quad_offsets; @@ -74,7 +75,7 @@ inline __device__ thrust::tuple get_local_poly_index_and_cou auto const rhs = thrust::upper_bound(thrust::seq, quad_offsets + poly_index, rhs_end, quad_offset); - return thrust::make_tuple( + return std::make_pair( // local_poly_index static_cast(thrust::distance(lhs, quad_offsets + poly_index)), // num_polys_in_quad @@ -82,7 +83,7 @@ inline __device__ thrust::tuple get_local_poly_index_and_cou } template -inline __device__ thrust::tuple get_transposed_point_and_pair_index( +inline __device__ std::pair get_transposed_point_and_pair_index( uint32_t const global_index, uint32_t const *point_offsets, uint32_t const *point_offsets_end, @@ -90,12 +91,12 @@ inline __device__ thrust::tuple get_transposed_point_and_pai QuadOffsetsIter quad_offsets_end, QuadLengthsIter quad_lengths) { - uint32_t quad_poly_index{0}, local_point_offset{0}; - thrust::tie(quad_poly_index, local_point_offset) = + // uint32_t quad_poly_index, local_point_offset; + auto const [quad_poly_index, local_point_offset] = get_quad_poly_and_local_point_indices(global_index, point_offsets, point_offsets_end); - uint32_t local_poly_index{0}, num_polys_in_quad{0}; - thrust::tie(local_poly_index, num_polys_in_quad) = + // uint32_t local_poly_index, num_polys_in_quad; + auto const [local_poly_index, num_polys_in_quad] = get_local_poly_index_and_count(quad_poly_index, quad_offsets, quad_offsets_end); auto const quad_point_offset = quad_offsets[quad_poly_index]; @@ -104,7 +105,7 @@ inline __device__ thrust::tuple get_transposed_point_and_pai auto const quad_poly_point_start = local_poly_index * num_points_in_quad; auto const transposed_point_start = quad_poly_point_start + local_point_offset; - return thrust::make_tuple( + return std::make_pair( // transposed point index (transposed_point_start / num_polys_in_quad) + quad_point_offset, // transposed polyline index @@ -125,19 +126,15 @@ struct compute_point_poly_indices_and_distances { cudf::column_device_view const poly_points_y; inline __device__ thrust::tuple operator()(uint32_t const global_index) { - // (point_index, poly_index) - auto const ids = get_transposed_point_and_pair_index( + auto const [point_id, poly_id] = get_transposed_point_and_pair_index( global_index, point_offsets, point_offsets_end, quad_offsets, quad_offsets_end, quad_lengths); - auto const point_id = thrust::get<0>(ids); - auto const point_xy = points[point_id]; - auto const poly_idx = poly_indices[thrust::get<1>(ids)]; - auto const distance = point_to_poly_line_distance(thrust::get<0>(point_xy), // x - thrust::get<1>(point_xy), // y - poly_idx, - poly_offsets, - poly_points_x, - poly_points_y); + T x{}, y{}; + thrust::tie(x, y) = points[point_id]; + auto const poly_idx = poly_indices[poly_id]; + auto const distance = + point_to_poly_line_distance(x, y, poly_idx, poly_offsets, poly_points_x, poly_points_y); + return thrust::make_tuple(point_id, poly_idx, distance); } }; @@ -164,7 +161,7 @@ struct compute_quadtree_point_to_nearest_polyline { rmm::mr::device_memory_resource *mr) { // Wrapped in an IIFE so intermediate allocations are freed on return - auto reduction = [&]() { + auto const [point_idxs, poly_idxs, distances] = [&]() { auto num_poly_quad_pairs = poly_quad_pairs.num_rows(); auto poly_indices = poly_quad_pairs.column(0).begin(); auto quad_lengths = thrust::make_permutation_iterator( @@ -267,7 +264,7 @@ struct compute_quadtree_point_to_nearest_polyline { rmm::device_uvector distances(point_x.size(), stream); // Fill distances with 0 - thrust::fill(rmm::exec_policy(stream), distances.begin(), distances.end(), T{0}); + CUDA_TRY(cudaMemsetAsync(distances.data(), T{0}, distances.size(), stream.value())); // Reduce the intermediate point/polyline indices to lists of point/polyline index pairs and // distances, selecting the polyline index closest to each point. @@ -317,10 +314,6 @@ struct compute_quadtree_point_to_nearest_polyline { return std::make_tuple(std::move(point_idxs), std::move(poly_idxs), std::move(distances)); }(); - auto const &point_idxs = std::get<0>(reduction); - auto const &poly_idxs = std::get<1>(reduction); - auto const &distances = std::get<2>(reduction); - // Allocate output columns for the point and polyline index pairs and their distances auto point_index_col = make_fixed_width_column(point_x.size(), stream, mr); auto poly_index_col = make_fixed_width_column(point_x.size(), stream, mr); From 2fb630e927e6d12a321e7e25a7bc74e5527bab11 Mon Sep 17 00:00:00 2001 From: ptaylor Date: Wed, 5 May 2021 23:21:02 -0500 Subject: [PATCH 17/22] avoid the copies incurred by shrinking the device_uvectors --- .../quadtree_point_to_nearest_polyline.cu | 37 +++++++++---------- 1 file changed, 17 insertions(+), 20 deletions(-) diff --git a/cpp/src/join/quadtree_point_to_nearest_polyline.cu b/cpp/src/join/quadtree_point_to_nearest_polyline.cu index 6950f6018..739e29b23 100644 --- a/cpp/src/join/quadtree_point_to_nearest_polyline.cu +++ b/cpp/src/join/quadtree_point_to_nearest_polyline.cu @@ -160,8 +160,8 @@ struct compute_quadtree_point_to_nearest_polyline { rmm::cuda_stream_view stream, rmm::mr::device_memory_resource *mr) { - // Wrapped in an IIFE so intermediate allocations are freed on return - auto const [point_idxs, poly_idxs, distances] = [&]() { + // Wrapped in an IIFE so `local_point_offsets` is freed on return + auto const [point_idxs, poly_idxs, distances, num_distances] = [&]() { auto num_poly_quad_pairs = poly_quad_pairs.num_rows(); auto poly_indices = poly_quad_pairs.column(0).begin(); auto quad_lengths = thrust::make_permutation_iterator( @@ -302,16 +302,8 @@ struct compute_quadtree_point_to_nearest_polyline { }) .first); - point_idxs.resize(num_distances, stream); - point_idxs.shrink_to_fit(stream); - - poly_idxs.resize(num_distances, stream); - poly_idxs.shrink_to_fit(stream); - - distances.resize(num_distances, stream); - distances.shrink_to_fit(stream); - - return std::make_tuple(std::move(point_idxs), std::move(poly_idxs), std::move(distances)); + return std::make_tuple( + std::move(point_idxs), std::move(poly_idxs), std::move(distances), num_distances); }(); // Allocate output columns for the point and polyline index pairs and their distances @@ -319,15 +311,20 @@ struct compute_quadtree_point_to_nearest_polyline { auto poly_index_col = make_fixed_width_column(point_x.size(), stream, mr); auto distance_col = make_fixed_width_column(point_x.size(), stream, mr); + // Note: no need to resize `point_idxs`, `poly_idxs`, or `distances` if we set the end iterator + // to `point_poly_idxs_and_distances + num_distances`. + + auto point_poly_idxs_and_distances = + thrust::make_zip_iterator(point_idxs.begin(), poly_idxs.begin(), distances.begin()); + // scatter the values from their positions after reduction into their output positions - thrust::scatter( - rmm::exec_policy(stream), - thrust::make_zip_iterator(point_idxs.begin(), poly_idxs.begin(), distances.begin()), - thrust::make_zip_iterator(point_idxs.end(), poly_idxs.end(), distances.end()), - point_idxs.begin(), - thrust::make_zip_iterator(point_index_col->mutable_view().begin(), - poly_index_col->mutable_view().begin(), - distance_col->mutable_view().template begin())); + thrust::scatter(rmm::exec_policy(stream), + point_poly_idxs_and_distances, + point_poly_idxs_and_distances + num_distances, + point_idxs.begin(), + thrust::make_zip_iterator(point_index_col->mutable_view().begin(), + poly_index_col->mutable_view().begin(), + distance_col->mutable_view().template begin())); std::vector> cols{}; cols.reserve(3); From ced903bda102bab2d7fa19fc80f6890f85e4f67a Mon Sep 17 00:00:00 2001 From: ptaylor Date: Wed, 5 May 2021 23:55:58 -0500 Subject: [PATCH 18/22] remove dead code --- cpp/src/join/quadtree_point_in_polygon.cu | 2 -- 1 file changed, 2 deletions(-) diff --git a/cpp/src/join/quadtree_point_in_polygon.cu b/cpp/src/join/quadtree_point_in_polygon.cu index 55e3207ce..8b575bec5 100644 --- a/cpp/src/join/quadtree_point_in_polygon.cu +++ b/cpp/src/join/quadtree_point_in_polygon.cu @@ -174,8 +174,6 @@ struct compute_quadtree_point_in_polygon { return std::make_tuple(std::move(poly_idxs), std::move(point_idxs), num_total_points); }(); - // auto &[poly_idxs, point_idxs, num_total_points] = poly_and_point_idxs; - auto poly_and_point_indices = thrust::make_zip_iterator(poly_idxs.begin(), point_idxs.begin()); // Enumerate the point X/Ys using the sorted `point_indices` (from quadtree construction) From f523461478e3bd8e526b32a832087381fec2d1f6 Mon Sep 17 00:00:00 2001 From: ptaylor Date: Thu, 6 May 2021 00:14:34 -0500 Subject: [PATCH 19/22] call cudaMemsetAsync with an int instead of a float/double --- cpp/src/join/quadtree_point_to_nearest_polyline.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/join/quadtree_point_to_nearest_polyline.cu b/cpp/src/join/quadtree_point_to_nearest_polyline.cu index 739e29b23..32b441b4a 100644 --- a/cpp/src/join/quadtree_point_to_nearest_polyline.cu +++ b/cpp/src/join/quadtree_point_to_nearest_polyline.cu @@ -264,7 +264,7 @@ struct compute_quadtree_point_to_nearest_polyline { rmm::device_uvector distances(point_x.size(), stream); // Fill distances with 0 - CUDA_TRY(cudaMemsetAsync(distances.data(), T{0}, distances.size(), stream.value())); + CUDA_TRY(cudaMemsetAsync(distances.data(), 0, distances.size(), stream.value())); // Reduce the intermediate point/polyline indices to lists of point/polyline index pairs and // distances, selecting the polyline index closest to each point. From 1bc89b4eab9d7033b6177097b45981939e794865 Mon Sep 17 00:00:00 2001 From: ptaylor Date: Tue, 11 May 2021 21:14:03 -0500 Subject: [PATCH 20/22] factor common code into point.cuh --- cpp/src/join/detail/point.cuh | 44 +++++++++++++++++++ cpp/src/join/quadtree_point_in_polygon.cu | 27 +++++------- .../quadtree_point_to_nearest_polyline.cu | 26 +++-------- 3 files changed, 60 insertions(+), 37 deletions(-) create mode 100644 cpp/src/join/detail/point.cuh diff --git a/cpp/src/join/detail/point.cuh b/cpp/src/join/detail/point.cuh new file mode 100644 index 000000000..199e9fe3d --- /dev/null +++ b/cpp/src/join/detail/point.cuh @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +namespace cuspatial { +namespace detail { + +inline __device__ std::pair get_quad_poly_and_local_point_indices( + uint32_t const global_index, uint32_t const *point_offsets, uint32_t const *point_offsets_end) +{ + // Calculate the position in "point_offsets" that `global_index` falls between. + // This position is the index of the poly/quad pair for this `global_index`. + // + // Dereferencing `local_point_offset` yields the zero-based first point position of this + // quadrant. Adding this zero-based position to the quadrant's first point position in the + // quadtree yields the "global" position in the `point_indices` map. + auto const local_point_offset = + thrust::upper_bound(thrust::seq, point_offsets, point_offsets_end, global_index) - 1; + return std::make_pair( + // quad_poly_index + thrust::distance(point_offsets, local_point_offset), + // local_point_index + global_index - *local_point_offset); +} + +} // namespace detail +} // namespace cuspatial diff --git a/cpp/src/join/quadtree_point_in_polygon.cu b/cpp/src/join/quadtree_point_in_polygon.cu index 8b575bec5..138a515d0 100644 --- a/cpp/src/join/quadtree_point_in_polygon.cu +++ b/cpp/src/join/quadtree_point_in_polygon.cu @@ -14,6 +14,8 @@ * limitations under the License. */ +#include "detail/point.cuh" + #include #include @@ -45,23 +47,16 @@ namespace { template struct compute_poly_and_point_indices { QuadOffsetsIter quad_point_offsets; - uint32_t const *local_point_offsets; - size_t const num_local_point_offsets; + uint32_t const *point_offsets; + uint32_t const *point_offsets_end; cudf::column_device_view const poly_indices; - thrust::tuple __device__ operator()(cudf::size_type const i) + thrust::tuple __device__ operator()(cudf::size_type const global_index) { - // Calculate the position in "local_point_offsets" that `i` falls between. - // This position is the index of the poly/quad pair for this `i`. - // - // Dereferencing `local_point_offset` yields the zero-based first point position of this - // quadrant. Adding this zero-based position to the quadrant's first point position in the - // quadtree yields the "global" position in the `point_indices` map. - auto po_begin = local_point_offsets; - auto po_end = local_point_offsets + num_local_point_offsets; - auto const local_point_offset = thrust::upper_bound(thrust::seq, po_begin, po_end, i) - 1; - uint32_t const pairs_idx = thrust::distance(local_point_offsets, local_point_offset); - uint32_t const point_idx = quad_point_offsets[pairs_idx] + (i - *local_point_offset); - uint32_t const poly_idx = poly_indices.element(pairs_idx); + // uint32_t quad_poly_index, local_point_index; + auto const [quad_poly_index, local_point_index] = + get_quad_poly_and_local_point_indices(global_index, point_offsets, point_offsets_end); + uint32_t const point_idx = quad_point_offsets[quad_poly_index] + local_point_index; + uint32_t const poly_idx = poly_indices.element(quad_poly_index); return thrust::make_tuple(poly_idx, point_idx); } }; @@ -168,7 +163,7 @@ struct compute_quadtree_point_in_polygon { compute_poly_and_point_indices{ quad_offsets_iter, local_point_offsets.begin(), - local_point_offsets.size() - 1, + local_point_offsets.end(), *cudf::column_device_view::create(poly_indices, stream)}); return std::make_tuple(std::move(poly_idxs), std::move(point_idxs), num_total_points); diff --git a/cpp/src/join/quadtree_point_to_nearest_polyline.cu b/cpp/src/join/quadtree_point_to_nearest_polyline.cu index 32b441b4a..a17aa2681 100644 --- a/cpp/src/join/quadtree_point_to_nearest_polyline.cu +++ b/cpp/src/join/quadtree_point_to_nearest_polyline.cu @@ -14,6 +14,8 @@ * limitations under the License. */ +#include "detail/point.cuh" + #include #include @@ -45,24 +47,6 @@ namespace cuspatial { namespace detail { namespace { -inline __device__ std::pair get_quad_poly_and_local_point_indices( - uint32_t const global_index, uint32_t const *point_offsets, uint32_t const *point_offsets_end) -{ - // Calculate the position in "point_offsets" that `global_index` falls between. - // This position is the index of the poly/quad pair for this `global_index`. - // - // Dereferencing `local_point_offset` yields the zero-based first point position of this - // quadrant. Adding this zero-based position to the quadrant's first point position in the - // quadtree yields the "global" position in the `point_indices` map. - auto const local_point_offset = - thrust::upper_bound(thrust::seq, point_offsets, point_offsets_end, global_index) - 1; - return std::make_pair( - // quad_poly_index - thrust::distance(point_offsets, local_point_offset), - // local_point_index - global_index - *local_point_offset); -} - template inline __device__ std::pair get_local_poly_index_and_count( uint32_t const poly_index, QuadOffsetsIter quad_offsets, QuadOffsetsIter quad_offsets_end) @@ -91,8 +75,8 @@ inline __device__ std::pair get_transposed_point_and_pair_in QuadOffsetsIter quad_offsets_end, QuadLengthsIter quad_lengths) { - // uint32_t quad_poly_index, local_point_offset; - auto const [quad_poly_index, local_point_offset] = + // uint32_t quad_poly_index, local_point_index; + auto const [quad_poly_index, local_point_index] = get_quad_poly_and_local_point_indices(global_index, point_offsets, point_offsets_end); // uint32_t local_poly_index, num_polys_in_quad; @@ -103,7 +87,7 @@ inline __device__ std::pair get_transposed_point_and_pair_in auto const num_points_in_quad = quad_lengths[quad_poly_index]; auto const quad_poly_offset = quad_poly_index - local_poly_index; auto const quad_poly_point_start = local_poly_index * num_points_in_quad; - auto const transposed_point_start = quad_poly_point_start + local_point_offset; + auto const transposed_point_start = quad_poly_point_start + local_point_index; return std::make_pair( // transposed point index From 82f056965f181882f20ad5ce0d1a95e954799186 Mon Sep 17 00:00:00 2001 From: Paul Taylor Date: Tue, 18 May 2021 07:03:51 -0500 Subject: [PATCH 21/22] Apply suggestions from code review Co-authored-by: Mark Harris --- cpp/src/join/quadtree_point_to_nearest_polyline.cu | 2 +- cpp/src/utility/point_to_nearest_polyline.cuh | 9 ++++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/cpp/src/join/quadtree_point_to_nearest_polyline.cu b/cpp/src/join/quadtree_point_to_nearest_polyline.cu index a17aa2681..aa8102fb6 100644 --- a/cpp/src/join/quadtree_point_to_nearest_polyline.cu +++ b/cpp/src/join/quadtree_point_to_nearest_polyline.cu @@ -248,7 +248,7 @@ struct compute_quadtree_point_to_nearest_polyline { rmm::device_uvector distances(point_x.size(), stream); // Fill distances with 0 - CUDA_TRY(cudaMemsetAsync(distances.data(), 0, distances.size(), stream.value())); + CUDA_TRY(cudaMemsetAsync(distances.data(), 0, distances.size() * sizeof(T), stream.value())); // Reduce the intermediate point/polyline indices to lists of point/polyline index pairs and // distances, selecting the polyline index closest to each point. diff --git a/cpp/src/utility/point_to_nearest_polyline.cuh b/cpp/src/utility/point_to_nearest_polyline.cuh index fc9de799c..dedba38d2 100644 --- a/cpp/src/utility/point_to_nearest_polyline.cuh +++ b/cpp/src/utility/point_to_nearest_polyline.cuh @@ -42,9 +42,12 @@ inline __device__ T point_to_poly_line_distance(T const px, auto const y0 = poly_points_y.element(i0); auto const x1 = poly_points_x.element(i1); auto const y1 = poly_points_y.element(i1); - auto const dx0 = px - x0, dy0 = py - y0; - auto const dx1 = px - x1, dy1 = py - y1; - auto const dx2 = x1 - x0, dy2 = y1 - y0; + auto const dx0 = px - x0; + auto const dy0 = py - y0; + auto const dx1 = px - x1; + auto const dy1 = py - y1; + auto const dx2 = x1 - x0; + auto const dy2 = y1 - y0; auto const d0 = dx0 * dx0 + dy0 * dy0; auto const d1 = dx1 * dx1 + dy1 * dy1; auto const d2 = dx2 * dx2 + dy2 * dy2; From c663e5ed4625850234714273580ac685b69ff992 Mon Sep 17 00:00:00 2001 From: ptaylor Date: Tue, 18 May 2021 08:55:28 -0500 Subject: [PATCH 22/22] fix lint --- cpp/src/utility/point_to_nearest_polyline.cuh | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/cpp/src/utility/point_to_nearest_polyline.cuh b/cpp/src/utility/point_to_nearest_polyline.cuh index dedba38d2..269d1f696 100644 --- a/cpp/src/utility/point_to_nearest_polyline.cuh +++ b/cpp/src/utility/point_to_nearest_polyline.cuh @@ -36,18 +36,18 @@ inline __device__ T point_to_poly_line_distance(T const px, : poly_points_x.size(); auto ring_len = ring_end - ring_begin; for (auto point_idx = 0u; point_idx < ring_len; ++point_idx) { - auto const i0 = ring_begin + ((point_idx + 0) % ring_len); - auto const i1 = ring_begin + ((point_idx + 1) % ring_len); - auto const x0 = poly_points_x.element(i0); - auto const y0 = poly_points_y.element(i0); - auto const x1 = poly_points_x.element(i1); - auto const y1 = poly_points_y.element(i1); - auto const dx0 = px - x0; - auto const dy0 = py - y0; - auto const dx1 = px - x1; - auto const dy1 = py - y1; - auto const dx2 = x1 - x0; - auto const dy2 = y1 - y0; + auto const i0 = ring_begin + ((point_idx + 0) % ring_len); + auto const i1 = ring_begin + ((point_idx + 1) % ring_len); + auto const x0 = poly_points_x.element(i0); + auto const y0 = poly_points_y.element(i0); + auto const x1 = poly_points_x.element(i1); + auto const y1 = poly_points_y.element(i1); + auto const dx0 = px - x0; + auto const dy0 = py - y0; + auto const dx1 = px - x1; + auto const dy1 = py - y1; + auto const dx2 = x1 - x0; + auto const dy2 = y1 - y0; auto const d0 = dx0 * dx0 + dy0 * dy0; auto const d1 = dx1 * dx1 + dy1 * dy1; auto const d2 = dx2 * dx2 + dy2 * dy2;