Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactors filtering.py, indexing.py to Accept GeoSeries #938

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 19 additions & 8 deletions python/cuspatial/benchmarks/api/bench_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,28 +121,35 @@ def bench_points_in_spatial_window(benchmark, gpu_dataframe):
geometry = gpu_dataframe["geometry"]
mean_x, std_x = (geometry.polygons.x.mean(), geometry.polygons.x.std())
mean_y, std_y = (geometry.polygons.y.mean(), geometry.polygons.y.std())
points = cuspatial.GeoSeries.from_points_xy(
cudf.DataFrame(
{"x": geometry.polygons.x, "y": geometry.polygons.y}
).interleave_columns()
)
benchmark(
cuspatial.points_in_spatial_window,
points,
mean_x - std_x,
mean_x + std_x,
mean_y - std_y,
mean_y + std_y,
geometry.polygons.x,
geometry.polygons.y,
)


def bench_quadtree_on_points(benchmark, gpu_dataframe):
polygons = gpu_dataframe["geometry"].polygons
x_points = (cupy.random.random(10000000) - 0.5) * 360
y_points = (cupy.random.random(10000000) - 0.5) * 180
points = cuspatial.GeoSeries.from_points_xy(
cudf.DataFrame({"x": x_points, "y": y_points}).interleave_columns()
)

scale = 5
max_depth = 7
min_size = 125
benchmark(
cuspatial.quadtree_on_points,
x_points,
y_points,
points,
polygons.x.min(),
polygons.x.max(),
polygons.y.min(),
Expand All @@ -160,9 +167,11 @@ def bench_quadtree_point_in_polygon(benchmark, polygons):
scale = 5
max_depth = 7
min_size = 125
points = cuspatial.GeoSeries.from_points_xy(
cudf.DataFrame({"x": x_points, "y": y_points}).interleave_columns()
)
point_indices, quadtree = cuspatial.quadtree_on_points(
x_points,
y_points,
points,
polygons.x.min(),
polygons.x.max(),
polygons.y.min(),
Expand Down Expand Up @@ -215,9 +224,11 @@ def bench_quadtree_point_to_nearest_linestring(benchmark):
polygons = gpu_countries["geometry"].polygons
points_x = gpu_cities["geometry"].points.x
points_y = gpu_cities["geometry"].points.y
points = cuspatial.GeoSeries.from_points_xy(
cudf.DataFrame({"x": points_x, "y": points_y}).interleave_columns()
)
point_indices, quadtree = cuspatial.quadtree_on_points(
points_x,
points_y,
points,
polygons.x.min(),
polygons.x.max(),
polygons.y.min(),
Expand Down
41 changes: 25 additions & 16 deletions python/cuspatial/cuspatial/core/spatial/filtering.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
# Copyright (c) 2022, NVIDIA CORPORATION.
# Copyright (c) 2022-2023, NVIDIA CORPORATION.

from cudf import DataFrame
from cudf.core.column import as_column

from cuspatial._lib import spatial_window
from cuspatial.utils.column_utils import normalize_point_columns
from cuspatial.core.geoseries import GeoSeries
from cuspatial.utils.column_utils import contains_only_points


def points_in_spatial_window(min_x, max_x, min_y, max_y, xs, ys):
def points_in_spatial_window(points: GeoSeries, min_x, max_x, min_y, max_y):
"""Return only the subset of coordinates that fall within a
rectangular window.

Expand All @@ -19,32 +20,40 @@ def points_in_spatial_window(min_x, max_x, min_y, max_y, xs, ys):

Parameters
----------
min_x
points: GeoSeries
A geoseries of points
min_x: float
lower x-coordinate of the query window
max_x
max_x: float
upper x-coordinate of the query window
min_y
min_y: float
lower y-coordinate of the query window
max_y
max_y: float
upper y-coordinate of the query window
xs
column of x-coordinates that may fall within the window
ys
column of y-coordinates that may fall within the window

Returns
-------
result : cudf.DataFrame
subset of `(x, y)` pairs above that fall within the window
result : GeoSeries
subset of `points` above that fall within the window

Notes
-----
* Swaps ``min_x`` and ``max_x`` if ``min_x > max_x``
* Swaps ``min_y`` and ``max_y`` if ``min_y > max_y``
"""
xs, ys = normalize_point_columns(as_column(xs), as_column(ys))
return DataFrame._from_data(

if len(points) == 0:
return GeoSeries([])

if not contains_only_points(points):
raise ValueError("GeoSeries must contain only points.")

xs = as_column(points.points.x)
ys = as_column(points.points.y)

res_xy = DataFrame._from_data(
*spatial_window.points_in_spatial_window(
min_x, max_x, min_y, max_y, xs, ys
)
)
).interleave_columns()
return GeoSeries.from_points_xy(res_xy)
20 changes: 12 additions & 8 deletions python/cuspatial/cuspatial/core/spatial/indexing.py
Original file line number Diff line number Diff line change
@@ -1,29 +1,28 @@
# Copyright (c) 2022, NVIDIA CORPORATION.
# Copyright (c) 2022-2023, NVIDIA CORPORATION.

import warnings

from cudf import DataFrame, Series
from cudf.core.column import as_column

from cuspatial import GeoSeries
from cuspatial._lib.quadtree import (
quadtree_on_points as cpp_quadtree_on_points,
)
from cuspatial.utils.column_utils import normalize_point_columns
from cuspatial.utils.column_utils import contains_only_points


def quadtree_on_points(
xs, ys, x_min, x_max, y_min, y_max, scale, max_depth, max_size
points: GeoSeries, x_min, x_max, y_min, y_max, scale, max_depth, max_size
):
"""
Construct a quadtree from a set of points for a given area-of-interest
bounding box.

Parameters
----------
xs
Column of x-coordinates for each point.
ys
Column of y-coordinates for each point.
points
Series of points.
x_min
The lower-left x-coordinate of the area of interest bounding box.
x_max
Expand Down Expand Up @@ -157,7 +156,12 @@ def quadtree_on_points(
Length: 120, dtype: int32
"""

xs, ys = normalize_point_columns(as_column(xs), as_column(ys))
if not len(points) == 0 and not contains_only_points(points):
raise ValueError("GeoSeries must contain only points.")

xs = as_column(points.points.x)
ys = as_column(points.points.y)

x_min, x_max, y_min, y_max = (
min(x_min, x_max),
max(x_min, x_max),
Expand Down
Original file line number Diff line number Diff line change
@@ -1,26 +1,25 @@
# Copyright (c) 2019, NVIDIA CORPORATION.
# Copyright (c) 2019-2023, NVIDIA CORPORATION.

import geopandas as gpd
import pytest

import cudf
from geopandas.testing import assert_geoseries_equal
from shapely.geometry import Point

import cuspatial


def test_zeros():
result = cuspatial.points_in_spatial_window( # noqa: F841
0, 0, 0, 0, cudf.Series([0.0]), cudf.Series([0.0])
cuspatial.GeoSeries([Point(0, 0)]), 0, 0, 0, 0
)
assert result.empty


def test_centered():
result = cuspatial.points_in_spatial_window(
-1, 1, -1, 1, cudf.Series([0.0]), cudf.Series([0.0])
)
cudf.testing.assert_frame_equal(
result, cudf.DataFrame({"x": [0.0], "y": [0.0]})
)
s = cuspatial.GeoSeries([Point(0, 0)])
result = cuspatial.points_in_spatial_window(s, -1, 1, -1, 1)

assert_geoseries_equal(result.to_geopandas(), gpd.GeoSeries([Point(0, 0)]))


@pytest.mark.parametrize(
Expand All @@ -29,38 +28,48 @@ def test_centered():
def test_corners(coords):
x, y = coords
result = cuspatial.points_in_spatial_window(
-1.1, 1.1, -1.1, 1.1, cudf.Series([x]), cudf.Series([y])
)
cudf.testing.assert_frame_equal(
result, cudf.DataFrame({"x": [x], "y": [y]})
cuspatial.GeoSeries([Point(x, y)]), -1.1, 1.1, -1.1, 1.1
)
assert_geoseries_equal(result.to_geopandas(), gpd.GeoSeries([Point(x, y)]))


def test_pair():
result = cuspatial.points_in_spatial_window(
-1.1, 1.1, -1.1, 1.1, cudf.Series([0.0, 1.0]), cudf.Series([1.0, 0.0])
cuspatial.GeoSeries([Point(0, 1), Point(1, 0)]), -1.1, 1.1, -1.1, 1.1
)
cudf.testing.assert_frame_equal(
result, cudf.DataFrame({"x": [0.0, 1.0], "y": [1.0, 0.0]})
assert_geoseries_equal(
result.to_geopandas(), gpd.GeoSeries([Point(0, 1), Point(1, 0)])
)


def test_oob():
result = cuspatial.points_in_spatial_window(
-1, 1, -1, 1, cudf.Series([-2.0, 2.0]), cudf.Series([2.0, -2.0])
cuspatial.GeoSeries([Point(-2.0, 2.0), Point(2.0, -2.0)]),
-1,
1,
-1,
1,
)
cudf.testing.assert_frame_equal(result, cudf.DataFrame({"x": [], "y": []}))
assert_geoseries_equal(result.to_geopandas(), gpd.GeoSeries([]))


def test_half():
result = cuspatial.points_in_spatial_window(
cuspatial.GeoSeries(
[
Point(-1.0, 1.0),
Point(1.0, -1.0),
Point(3.0, 3.0),
Point(-3.0, -3.0),
]
),
-2,
2,
-2,
2,
cudf.Series([-1.0, 1.0, 3.0, -3.0]),
cudf.Series([1.0, -1.0, 3.0, -3.0]),
)
cudf.testing.assert_frame_equal(
result, cudf.DataFrame({"x": [-1.0, 1.0], "y": [1.0, -1.0]})

assert_geoseries_equal(
result.to_geopandas(),
gpd.GeoSeries([Point(-1.0, 1.0), Point(1.0, -1.0)]),
)
Loading