From 366183c857f104a8b5a2410f9c1437ada264a296 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Fri, 6 Oct 2023 04:42:47 +0800 Subject: [PATCH] Benchmark `GeoSeries.Distance` (#1277) closes #994 There are primitive benchmark results for `GeoSeries.distance` in #1231. This PR plans to add more benchmark coverage: TODO: - [x] point-point - [x] point-linestring - [x] point-polygon - [x] linestring-linestring - [x] linestring-polygon - [x] polygon-polygon - [x] Geometry complexity dimension - [x] Geometry spatial relationship dimension - [ ] Write the blog Authors: - Michael Wang (https://github.com/isVoid) Approvers: - Mark Harris (https://github.com/harrism) URL: https://github.com/rapidsai/cuspatial/pull/1277 --- python/cuspatial/benchmarks/api/bench_api.py | 141 ++++++++++++++++++- python/cuspatial/benchmarks/conftest.py | 85 ++++++++++- 2 files changed, 222 insertions(+), 4 deletions(-) diff --git a/python/cuspatial/benchmarks/api/bench_api.py b/python/cuspatial/benchmarks/api/bench_api.py index ccc899f14..d93e45ebb 100644 --- a/python/cuspatial/benchmarks/api/bench_api.py +++ b/python/cuspatial/benchmarks/api/bench_api.py @@ -1,7 +1,7 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. - +# Copyright (c) 2022-2023, NVIDIA CORPORATION. import cupy import geopandas +import pytest import cudf @@ -125,7 +125,7 @@ def bench_haversine_distance(benchmark, gpu_dataframe): benchmark(cuspatial.haversine_distance, points_first, points_second) -def bench_pairwise_linestring_distance(benchmark, gpu_dataframe): +def bench_distance_pairwise_linestring(benchmark, gpu_dataframe): geometry = gpu_dataframe["geometry"] benchmark( cuspatial.pairwise_linestring_distance, @@ -293,3 +293,138 @@ def bench_point_in_polygon(benchmark, polygons): short_dataframe = polygons.iloc[0:31] geometry = short_dataframe["geometry"] benchmark(cuspatial.point_in_polygon, points, geometry) + + +# GeoSeries.distance benchmarking. + + +@pytest.mark.parametrize("align", [True, False]) +@pytest.mark.parametrize("n", [1e3, 1e4, 1e5, 1e6, 1e7]) +@pytest.mark.parametrize("lib", ["cuspatial", "geopandas"]) +def bench_distance_point(benchmark, lib, point_generator_device, n, align): + points = point_generator_device(int(n)) + other_points = point_generator_device(int(n)) + index = cudf.Index(cupy.arange(len(other_points) - 1, -1, -1)) + + if lib == "geopandas": + points = points.to_geopandas() + other_points = other_points.to_geopandas() + index = index.to_pandas() + + other_points.index = index + benchmark(points.distance, other_points, align) + + +@pytest.mark.parametrize("align", [True, False]) +@pytest.mark.parametrize("n", [1e3, 1e4, 1e5, 1e6, 1e7]) +@pytest.mark.parametrize("lib", ["cuspatial", "geopandas"]) +def bench_distance_point_linestring( + benchmark, + point_generator_device, + linestring_generator_device, + lib, + n, + align, +): + points = point_generator_device(int(n)) + linestrings = linestring_generator_device(int(n), 20) + index = cudf.Index(cupy.arange(len(linestrings) - 1, -1, -1)) + + if lib == "geopandas": + points = points.to_geopandas() + linestrings = linestrings.to_geopandas() + index = index.to_pandas() + + linestrings.index = index + benchmark(points.distance, linestrings, align) + + +@pytest.mark.parametrize("align", [True, False]) +@pytest.mark.parametrize("n", [1e3, 1e4, 1e5, 1e6, 1e7]) +@pytest.mark.parametrize("lib", ["cuspatial", "geopandas"]) +def bench_distance_point_polygon( + benchmark, point_generator_device, polygon_generator_device, lib, n, align +): + points = point_generator_device(int(n)) + polygons = polygon_generator_device(int(n), 38) + index = cudf.Index(cupy.arange(len(polygons) - 1, -1, -1)) + + if lib == "geopandas": + points = points.to_geopandas() + polygons = polygons.to_geopandas() + index = index.to_pandas() + + polygons.index = index + benchmark(points.distance, polygons, align) + + +@pytest.mark.parametrize("align", [True, False]) +@pytest.mark.parametrize("n", [1e3, 1e4, 1e5, 1e6, 1e7]) +@pytest.mark.parametrize("lib", ["cuspatial", "geopandas"]) +def bench_distance_linestring_linestring( + benchmark, linestring_generator_device, lib, n, align +): + lines1 = linestring_generator_device(int(n), 20) + lines2 = linestring_generator_device(int(n), 20) + index = cudf.Index(cupy.arange(len(lines1) - 1, -1, -1)) + + if lib == "geopandas": + lines1 = lines1.to_geopandas() + lines2 = lines2.to_geopandas() + index = index.to_pandas() + + lines1.index = index + benchmark(lines1.distance, lines2, align) + + +@pytest.mark.parametrize("align", [True, False]) +@pytest.mark.parametrize("n", [1e3, 1e4, 1e5, 1e6, 1e7]) +@pytest.mark.parametrize("lib", ["cuspatial", "geopandas"]) +@pytest.mark.parametrize( + "num_segments, num_sides", [(5, 5), (20, 38), (100, 100), (1000, 1000)] +) +def bench_distance_linestring_polygon( + benchmark, + lib, + linestring_generator_device, + polygon_generator_device, + n, + align, + num_segments, + num_sides, +): + lines = linestring_generator_device(int(n), num_segments) + polygons = polygon_generator_device(int(n), num_sides) + index = cudf.Index(cupy.arange(len(lines) - 1, -1, -1)) + + if lib == "geopandas": + lines = lines.to_geopandas() + polygons = polygons.to_geopandas() + index = index.to_pandas() + + lines.index = index + benchmark(lines.distance, polygons, align) + + +@pytest.mark.parametrize("align", [True, False]) +@pytest.mark.parametrize("n", [1e3, 1e4, 1e5, 1e6, 1e7]) +@pytest.mark.parametrize("lib", ["cuspatial", "geopandas"]) +@pytest.mark.parametrize("intersects", [True, False]) +def bench_distance_polygon( + benchmark, lib, polygon_generator_device, n, align, intersects +): + polygons1 = polygon_generator_device( + int(n), 38, radius=1.0, all_concentric=True + ) + polygons2 = polygon_generator_device( + int(n), 38, radius=0.5, all_concentric=True + ) + index = cudf.Index(cupy.arange(len(polygons1) - 1, -1, -1)) + + if lib == "geopandas": + polygons1 = polygons1.to_geopandas() + polygons2 = polygons2.to_geopandas() + index = index.to_pandas() + + polygons1.index = index + benchmark(polygons1.distance, polygons2, align) diff --git a/python/cuspatial/benchmarks/conftest.py b/python/cuspatial/benchmarks/conftest.py index f7a475444..5459663b5 100644 --- a/python/cuspatial/benchmarks/conftest.py +++ b/python/cuspatial/benchmarks/conftest.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. +# Copyright (c) 2022-2023, NVIDIA CORPORATION. """Defines pytest fixtures for all benchmarks. @@ -11,7 +11,9 @@ import geopandas as gpd import numpy as np import pandas as pd +import pytest import pytest_cases +from numba import cuda from shapely.geometry import ( LineString, MultiLineString, @@ -176,3 +178,84 @@ def shapefile(tmp_path, gpdf_100): p = d / "read_polygon_shapefile" gpdf_100.to_file(p) return p + + +@pytest.fixture() +def point_generator_device(): + def generator(n): + coords = cp.random.random(n * 2, dtype="f8") + return cuspatial.GeoSeries.from_points_xy(coords) + + return generator + + +# Numba kernel to generate a closed ring for each polygon +@cuda.jit +def generate_polygon_coordinates( + coordinate_array, centroids, radius, num_vertices +): + i = cuda.grid(1) + if i >= coordinate_array.size: + return + + point_idx = i // 2 + geometry_idx = point_idx // (num_vertices + 1) + + # The last index should wrap around to 0 + intra_point_idx = point_idx % (num_vertices + 1) + + centroid = centroids[geometry_idx] + angle = 2 * np.pi * intra_point_idx / num_vertices + + if i % 2 == 0: + coordinate_array[i] = centroid[0] + radius * np.cos(angle) + else: + coordinate_array[i] = centroid[1] + radius * np.sin(angle) + + +@pytest.fixture() +def polygon_generator_device(): + def generator(n, num_vertices, radius=1.0, all_concentric=False): + geometry_offsets = cp.arange(n + 1) + part_offsets = cp.arange(n + 1) + + # Each polygon has a closed ring, so we need to add an extra point + ring_offsets = cp.arange( + (n + 1) * (num_vertices + 1), step=(num_vertices + 1) + ) + num_points = int(ring_offsets[-1].get()) + + if not all_concentric: + centroids = cp.random.random((n, 2)) + else: + centroids = cp.zeros((n, 2)) + coords = cp.ndarray((num_points * 2,), dtype="f8") + generate_polygon_coordinates.forall(len(coords))( + coords, centroids, radius, num_vertices + ) + return cuspatial.GeoSeries.from_polygons_xy( + coords, ring_offsets, part_offsets, geometry_offsets + ) + + return generator + + +@pytest.fixture() +def linestring_generator_device(polygon_generator_device): + """Reusing polygon_generator_device, treating the rings of the + generated polygons as linestrings. This is to gain locality to + the generated linestrings. + """ + + def generator(n, segment_per_linestring): + polygons = polygon_generator_device( + n, segment_per_linestring, all_concentric=False + ) + + return cuspatial.GeoSeries.from_linestrings_xy( + polygons.polygons.xy, + polygons.polygons.ring_offset, + polygons.polygons.geometry_offset, + ) + + return generator