diff --git a/conda/environments/cuspatial_dev_cuda11.0.yml b/conda/environments/cuspatial_dev_cuda11.0.yml index bc8cbed52..034895400 100644 --- a/conda/environments/cuspatial_dev_cuda11.0.yml +++ b/conda/environments/cuspatial_dev_cuda11.0.yml @@ -15,3 +15,4 @@ dependencies: - cython>=0.29,<0.30 - gtest=1.10.0 - gmock=1.10.0 + - pydata-sphinx-theme diff --git a/conda/environments/cuspatial_dev_cuda11.1.yml b/conda/environments/cuspatial_dev_cuda11.1.yml index 2e5d1cd99..cf3440c51 100644 --- a/conda/environments/cuspatial_dev_cuda11.1.yml +++ b/conda/environments/cuspatial_dev_cuda11.1.yml @@ -15,3 +15,4 @@ dependencies: - cython>=0.29,<0.30 - gtest=1.10.0 - gmock=1.10.0 + - pydata-sphinx-theme diff --git a/conda/environments/cuspatial_dev_cuda11.2.yml b/conda/environments/cuspatial_dev_cuda11.2.yml index 86c8d40ab..c10c8e281 100644 --- a/conda/environments/cuspatial_dev_cuda11.2.yml +++ b/conda/environments/cuspatial_dev_cuda11.2.yml @@ -15,3 +15,4 @@ dependencies: - cython>=0.29,<0.30 - gtest=1.10.0 - gmock=1.10.0 + - pydata-sphinx-theme diff --git a/docs/source/api.rst b/docs/source/api.rst deleted file mode 100644 index 26cb63535..000000000 --- a/docs/source/api.rst +++ /dev/null @@ -1,84 +0,0 @@ -GeoPandas Compatibility ------------------------ - -We support any geometry format supported by `GeoPandas`. Load geometry information from a `GeoPandas.GeoSeries` or `GeoPandas.GeoDataFrame`. - - >>> gpdf = geopandas.read_file('arbitrary.txt') - cugpdf = cuspatial.from_geopandas(gpdf) - -or - - >>> cugpdf = cuspatial.GeoDataFrame(gpdf) - -.. currentmodule:: cuspatial - -.. autoclass:: cuspatial.GeoDataFrame - :members: - :show-inheritance: -.. autoclass:: cuspatial.GeoSeries - :members: - :show-inheritance: -.. autoclass:: cuspatial.geometry.geocolumn.GeoColumn - :members: - :show-inheritance: -.. autoclass:: cuspatial.GeoArrowBuffers - :members: - :show-inheritance: - - -Spatial Indexing --------- - -Spatial indexing functions provide blisteringly-fast on-GPU point-in-polygon -operations. - -.. currentmodule:: cuspatial - -.. autofunction:: cuspatial.quadtree_point_in_polygon -.. autofunction:: cuspatial.quadtree_point_to_nearest_polyline -.. autofunction:: cuspatial.point_in_polygon -.. autofunction:: cuspatial.polygon_bounding_boxes -.. autofunction:: cuspatial.polyline_bounding_boxes -.. autofunction:: cuspatial.quadtree_on_points -.. autofunction:: cuspatial.join_quadtree_and_bounding_boxes -.. autofunction:: cuspatial.points_in_spatial_window - - -GIS ---- - -Two GIS functions make it easier to compute distances with geographic coordinates. - -.. currentmodule:: cuspatial - -.. autofunction:: cuspatial.haversine_distance -.. autofunction:: cuspatial.lonlat_to_cartesian - - -Trajectory ----------- - -Trajectory functions make it easy to identify and group trajectories from point data. - -.. currentmodule:: cuspatial - -.. autofunction:: cuspatial.derive_trajectories -.. autofunction:: cuspatial.trajectory_distances_and_speeds -.. autofunction:: cuspatial.directed_hausdorff_distance -.. autofunction:: cuspatial.trajectory_bounding_boxes -.. autoclass:: CubicSpline -.. automethod:: CubicSpline.__init__ -.. automethod:: CubicSpline.__call__ - - -IO --- - -cuSpatial offers native GPU-accelerated shapefile reading. In addition, any host-side GeoPandas DataFrame can be copied into GPU memory for use with cuSpatial -algorithms. - -.. currentmodule:: cuspatial - -.. autofunction:: cuspatial.read_polygon_shapefile -.. autofunction:: cuspatial.from_geopandas - diff --git a/docs/source/api_docs/geopandas_compatibility.rst b/docs/source/api_docs/geopandas_compatibility.rst new file mode 100644 index 000000000..946273cc5 --- /dev/null +++ b/docs/source/api_docs/geopandas_compatibility.rst @@ -0,0 +1,18 @@ +GeoPandas Compatibility +----------------------- + +cuSpatial supports any geometry format supported by `GeoPandas`. Load geometry information from a `GeoPandas.GeoSeries` or `GeoPandas.GeoDataFrame`. + + >>> gpdf = geopandas.read_file('arbitrary.txt') + cugpdf = cuspatial.from_geopandas(gpdf) + +or + + >>> cugpdf = cuspatial.GeoDataFrame(gpdf) + +.. currentmodule:: cuspatial + +.. autoclass:: cuspatial.GeoDataFrame + :members: +.. autoclass:: cuspatial.GeoSeries + :members: diff --git a/docs/source/api_docs/gis.rst b/docs/source/api_docs/gis.rst new file mode 100644 index 000000000..d709bc9c8 --- /dev/null +++ b/docs/source/api_docs/gis.rst @@ -0,0 +1,9 @@ +GIS +--- + +Functions for computing geographic coordinates. + +.. currentmodule:: cuspatial + +.. autofunction:: cuspatial.haversine_distance +.. autofunction:: cuspatial.lonlat_to_cartesian diff --git a/docs/source/api_docs/internals.rst b/docs/source/api_docs/internals.rst new file mode 100644 index 000000000..2e95acb67 --- /dev/null +++ b/docs/source/api_docs/internals.rst @@ -0,0 +1,113 @@ +Internals +--------- + +This page includes information to help users understand the internal +data structure of cuspatial. + +GeoArrow Format ++++++++++++++++ + +Geospatial data is context rich; aside from just a set of +numbers representing coordinates, they together represent certain geometry +that requires grouping. For example, given 5 points in a plane, +they could be 5 separate points, 2 line segments, a single linestring, +or a pentagon. Many geometry libraries stores the points in +arrays of geometric objects, commonly known as "Array of Structure" (AoS). +AoS is not efficient for accelerated computing on parallel devices such +as GPU. Therefore, GeoArrow format was introduced to store geodata in +densely packed format, commonly known as "Structure of Arrays" (SoA). + +The GeoArrow format specifies a tabular data format for geometry +information. Supported types include `Point`, `MultiPoint`, `LineString`, +`MultiLineString`, `Polygon`, and `MultiPolygon`. In order to store +these coordinate types in a strictly tabular fashion, columns are +created for Points, MultiPoints, LineStrings, and Polygons. +MultiLines and MultiPolygons are stored in the same data structure +as LineStrings and Polygons. + +GeoArrow format packs complex geometry types into 14 single-column Arrow +tables. See :func:`GeoArrowBuffers<cuspatial.GeoArrowBuffers>` docstring +for the complete list of keys for the columns. + +Examples +******** + +The `Point` geometry is the simplest. N points are stored in a length 2*N +buffer with interleaved x,y coordinates. An optional z buffer of length N +can be used. + +A `Multipoint` is a group of points, and is the second simplest GeoArrow +geometry type. It is identical to points, with the addition of a +``multipoints_offsets`` buffer. The offsets buffer stores N+1 indices. The +first multipoint offset is specified by 0, which is always stored in +``offsets[0]``. The second offset is stored in ``offsets[1]``, and so on. +The number of points in multipoint ``i`` is the difference between +``offsets[i+1]`` and ``offsets[i]``. + + +Consider:: + + buffers = GeoArrowBuffers({ + "multipoints_xy": + [0, 0, 0, 1, 0, 2, 1, 0, 1, 1, 1, 2, 2, 0, 2, 1, 2, 2], + "multipoints_offsets": + [0, 6, 12, 18] + }) + +which encodes the following GeoPandas Series:: + + series = geopandas.Series([ + MultiPoint((0, 0), (0, 1), (0, 2)), + MultiPoint((1, 0), (1, 1), (1, 2)), + MultiPoint((2, 0), (2, 1), (2, 2)), + ]) + +`LineString` geometry is more complicated than multipoints because the +format allows for the use of `LineString` and `MultiLineString` in the same +buffer, via the ``mlines`` buffer. The ``mlines`` buffer stores 2M indices, where M +is the number of `MultiLineString` s. The starting and ending **Linestring offset** of the `i` th +`MultiLineString` is stored at ``mlines[2*i]`` and ``mlines[2*i+1]`` respectively. + + +Consider:: + + buffers = GeoArrowBuffers({ + "lines_xy": + [0, 0, 0, 1, 0, 2, 1, 0, 1, 1, 1, 2, 2, 0, 2, 1, 2, 2, 3, 0, + 3, 1, 3, 2, 4, 0, 4, 1, 4, 2], + "lines_offsets": + [0, 6, 12, 18, 24, 30], + "mlines": + [1, 3] + }) + +Which encodes a GeoPandas Series:: + + series = geopandas.Series([ + LineString((0, 0), (0, 1), (0, 2)), + MultiLineString([(1, 0), (1, 1), (1, 2)], + [(2, 0), (2, 1), (2, 2)], + ) + LineString((3, 0), (3, 1), (3, 2)), + LineString((4, 0), (4, 1), (4, 2)), + ]) + +Note that ``mlines`` has 2 entries, and therefore there is 1 +`MultiLineString` in ``buffers``. It consists of 2 +`LineStrings`: the second and third `LineString` in the defined by +``lines_offsets``. + + +Polygon geometry includes `mpolygons` for MultiPolygons similar to the +LineString geometry. Polygons are encoded using the same format as +`Shapefile <https://en.wikipedia.org/wiki/Shapefile>`_ , +with left-wound external rings and right-wound internal rings. + +GeoArrow Internal APIs +********************** + +.. autoclass:: cuspatial.GeoArrowBuffers + :members: +.. autoclass:: cuspatial.geometry.geocolumn.GeoMeta +.. autoclass:: cuspatial.geometry.geocolumn.GeoColumn + :members: diff --git a/docs/source/api_docs/io.rst b/docs/source/api_docs/io.rst new file mode 100644 index 000000000..0d80d6df9 --- /dev/null +++ b/docs/source/api_docs/io.rst @@ -0,0 +1,10 @@ +IO +-- + +cuSpatial offers native GPU-accelerated shapefile reading. In addition, any host-side GeoPandas DataFrame can be copied into GPU memory for use with cuSpatial +algorithms. + +.. currentmodule:: cuspatial + +.. autofunction:: cuspatial.read_polygon_shapefile +.. autofunction:: cuspatial.from_geopandas diff --git a/docs/source/api_docs/spatial_indexing.rst b/docs/source/api_docs/spatial_indexing.rst new file mode 100644 index 000000000..42a0de726 --- /dev/null +++ b/docs/source/api_docs/spatial_indexing.rst @@ -0,0 +1,15 @@ +Spatial Indexing +---------------- + +Spatial indexing functions provide GPU-accelerated point-in-polygon and spatial join operations. + +.. currentmodule:: cuspatial + +.. autofunction:: cuspatial.quadtree_point_in_polygon +.. autofunction:: cuspatial.quadtree_point_to_nearest_polyline +.. autofunction:: cuspatial.point_in_polygon +.. autofunction:: cuspatial.polygon_bounding_boxes +.. autofunction:: cuspatial.polyline_bounding_boxes +.. autofunction:: cuspatial.quadtree_on_points +.. autofunction:: cuspatial.join_quadtree_and_bounding_boxes +.. autofunction:: cuspatial.points_in_spatial_window diff --git a/docs/source/api_docs/trajectory.rst b/docs/source/api_docs/trajectory.rst new file mode 100644 index 000000000..2882eee53 --- /dev/null +++ b/docs/source/api_docs/trajectory.rst @@ -0,0 +1,14 @@ +Trajectory +---------- + +Functions for identifying and grouping trajectories from point data. + +.. currentmodule:: cuspatial + +.. autofunction:: cuspatial.derive_trajectories +.. autofunction:: cuspatial.trajectory_distances_and_speeds +.. autofunction:: cuspatial.directed_hausdorff_distance +.. autofunction:: cuspatial.trajectory_bounding_boxes +.. autoclass:: CubicSpline +.. automethod:: CubicSpline.__init__ +.. automethod:: CubicSpline.__call__ diff --git a/docs/source/conf.py b/docs/source/conf.py index 85ae77601..2f610c4e8 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -62,7 +62,7 @@ # General information about the project. project = "cuspatial" -copyright = "2019, NVIDIA" +copyright = "2019-2022, NVIDIA" author = "NVIDIA" # The version info for the project you're documenting, acts as replacement for @@ -92,6 +92,13 @@ # If true, `todo` and `todoList` produce output, else they produce nothing. todo_include_todos = False +html_theme_options = { + "external_links": [], + "github_url": "https://github.com/rapidsai/cuspatial", + "twitter_url": "https://twitter.com/rapidsai", + "show_toc_level": 1, + "navbar_align": "right", +} # -- Options for HTML output ---------------------------------------------- @@ -99,20 +106,7 @@ # a list of builtin themes. # -html_theme = "sphinx_rtd_theme" - -# on_rtd is whether we are on readthedocs.org -on_rtd = os.environ.get("READTHEDOCS", None) == "True" - -if not on_rtd: - # only import and set the theme if we're building docs locally - # otherwise, readthedocs.org uses their theme by default, - # so no need to specify it - import sphinx_rtd_theme - - html_theme = "sphinx_rtd_theme" - html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] - +html_theme = "pydata_sphinx_theme" # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the @@ -189,8 +183,12 @@ # Example configuration for intersphinx: refer to the Python standard library. -intersphinx_mapping = {"https://docs.python.org/": None} - +intersphinx_mapping = { + "python": ("https://docs.python.org/3", None), + "pandas": ("https://pandas.pydata.org/pandas-docs/stable/", None), + "geopandas": ("https://geopandas.readthedocs.io/en/latest/", None), + "cudf": ("https://docs.rapids.ai/api/cudf/stable/", None), +} # Config numpydoc numpydoc_show_inherited_class_members = False diff --git a/docs/source/index.rst b/docs/source/index.rst index ee58867d5..194d06967 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -15,26 +15,35 @@ geometries. GeoArrow -------- -cuSpatial proposes a new GeoArrow format from the fruit of discussions with the GeoPandas team. GeoArrow is a packed columnar data format for the six fundamental geometry types: Point, MultiPoint, Lines, MultiLines, Polygons, and MultiPolygons. MultiGeometry is a possibility that may be implemented in the future. GeoArrow uses packed coordinate and offset columns to define objects, which enables very-fast copy between CPU, GPU, and NIC. +cuSpatial proposes a new GeoArrow format from the fruit of discussions +with the GeoPandas team. GeoArrow is a packed columnar data format +for the six fundamental geometry types: +Point, MultiPoint, Lines, MultiLines, Polygons, and MultiPolygons. +MultiGeometry is a possibility that may be implemented in the future. +GeoArrow uses packed coordinate and offset columns to define objects, +which enables very fast copies between CPU, GPU, and NIC. -Any data source that is loaded into cuSpatial via :func:`cuspatial.from_geopandas` can then take advantage of `cudf`'s GPU-accelerated Arrow I/O routines. +Any data source that is loaded into cuSpatial via :func:`cuspatial.from_geopandas` +can then take advantage of `cudf`'s GPU-accelerated Arrow I/O routines. + +Read more about GeoArrow format in :ref:`GeoArrow Format`. -Read more about GeoArrow format in :func:`GeoArrowBuffers<cuspatial.GeoArrowBuffers>` - cuSpatial API Reference -~~~~~~~~~~~~~~~~~~~~~~~ +----------------------- .. toctree:: :maxdepth: 2 - :caption: Contents: - - api.rst -~~~~~~~~~~~~~~~~~~~~~~~ + api_docs/gis.rst + api_docs/spatial_indexing.rst + api_docs/trajectory.rst + api_docs/geopandas_compatibility.rst + api_docs/io.rst + api_docs/internals.rst + Indices and tables ================== * :ref:`genindex` -* :ref:`modindex` * :ref:`search` diff --git a/python/cuspatial/cuspatial/core/interpolate.py b/python/cuspatial/cuspatial/core/interpolate.py index ef191e490..cbdcc6a51 100644 --- a/python/cuspatial/cuspatial/core/interpolate.py +++ b/python/cuspatial/cuspatial/core/interpolate.py @@ -46,9 +46,11 @@ class CubicSpline: host based interpolation performance is likely to exceed GPU performance for a single curve. - However, cuSpatial massively outperforms scipy when many splines are fit - simultaneously. Data must be arranged in a SoA format, and the exclusive - `prefix_sum` of the separate curves must also be passed to the function.:: + However, cuSpatial significantly outperforms scipy when many splines are + fit simultaneously. Data must be arranged in a SoA format, and the + exclusive `prefix_sum` of the separate curves must also be passed to the + function.:: + NUM_SPLINES = 100000 SPLINE_LENGTH = 101 t = cudf.Series( diff --git a/python/cuspatial/cuspatial/geometry/geoarrowbuffers.py b/python/cuspatial/cuspatial/geometry/geoarrowbuffers.py index 6071c2d6a..7336a3c09 100644 --- a/python/cuspatial/cuspatial/geometry/geoarrowbuffers.py +++ b/python/cuspatial/cuspatial/geometry/geoarrowbuffers.py @@ -13,18 +13,8 @@ class GeoArrowBuffers: """A GPU GeoArrowBuffers object. - Parameters - ---------- - data : A dict or a GeoArrowBuffers object. - - The GeoArrow format specifies a tabular data format for geometry - information. Supported types include `Point`, `MultiPoint`, `LineString`, - `MultiLineString`, `Polygon`, and `MultiPolygon`. In order to store - these coordinate types in a strictly tabular fashion, columns are - created for Points, MultiPoints, LineStrings, and Polygons. - MultiLines and MultiPolygons are stored in the same data structure - as LineStrings and Polygons. GeoArrowBuffers are constructed from a dict - of host buffers with accepted keys: + GeoArrowBuffers are constructed from a dict of host buffers with + accepted keys: * points_xy * points_z @@ -45,78 +35,15 @@ class GeoArrowBuffers: Accepted host buffer object types include python list and any type that implements numpy's `__array__interface__` protocol. - GeoArrow Format - - GeoArrow format packs complex geometry types into 14 single-column Arrow - tables. This description is included for better understanding GeoArrow - format. Interacting with the GeoArrowBuffers is only required if you want - to convert cudf data to GeoPandas objects without starting from GeoPandas. - - The points geometry is the simplest: N points are stored in a length 2*N - buffer with interleaved x,y coordinates. An optional z buffer of length N - can be used. - - The multipoints geometry is the second simplest - identical to points, - with the addition of a multipoints_offsets buffer. The offsets buffer - stores N+1 indexes. The first multipoint is specified by 0, which is always - stored in offsets[0], and offsets[1], which is the length in points of - the first multipoint geometry. Subsequent multipoints are the prefix-sum of - the lengths of previous multipoints. - - Consider:: - - buffers = GeoArrowBuffers({ - "multipoints_xy": - [0, 0, 0, 1, 0, 2, 1, 0, 1, 1, 1, 2, 2, 0, 2, 1, 2, 2], - "multipoints_offsets": - [0, 6, 12, 18] - }) - - which encodes the following GeoPandas Series:: - - series = geopandas.Series([ - MultiPoint((0, 0), (0, 1), (0, 2)), - MultiPoint((1, 0), (1, 1), (1, 2)), - MultiPoint((2, 0), (2, 1), (2, 2)), - ]) - - LineString geometry is more complicated than multipoints because the - format allows for the use of LineStrings and MultiLineStrings in the same - buffer, via the mlines key:: - - buffers = GeoArrowBuffers({ - "lines_xy": - [0, 0, 0, 1, 0, 2, 1, 0, 1, 1, 1, 2, 2, 0, 2, 1, 2, 2, 3, 0, - 3, 1, 3, 2, 4, 0, 4, 1, 4, 2], - "lines_offsets": - [0, 6, 12, 18, 24, 30], - "mlines": - [1, 3] - }) - - Which encodes a GeoPandas Series:: - - series = geopandas.Series([ - LineString((0, 0), (0, 1), (0, 2)), - MultiLineString([(1, 0), (1, 1), (1, 2)], - [(2, 0), (2, 1), (2, 2)], - ) - LineString((3, 0), (3, 1), (3, 2)), - LineString((4, 0), (4, 1), (4, 2)), - ]) - - Polygon geometry includes `mpolygons` for MultiPolygons similar to the - LineString geometry. Polygons are encoded using the same format as - Shapefiles, with left-wound external rings and right-wound internal rings. - An exact example of `GeoArrowBuffers` to `geopandas.Series` is left to the - reader as an exercise. Convert any GeoPandas `Series` or `DataFrame` with - `cuspatial.from_geopandas(geopandas_object)`. - Notes ----- Legacy cuspatial algorithms depend on separated x and y columns. Access them with the `.x` and `.y` properties. + Parameters + ---------- + data : A dict or a GeoArrowBuffers object. + Examples -------- GeoArrowBuffers accept a dict as argument. Valid keys are in the bullet @@ -130,14 +57,14 @@ class GeoArrowBuffers: "multipoints_xy": [0, 0, 0, 1, 0, 2, 1, 0, 1, 1, 1, 2, 2, 0, 2, 1, 2, 2], "multipoints_offsets": - [0, 6, 12, 18] + [0, 6, 12, 18], "lines_xy": [0, 0, 0, 1, 0, 2, 1, 0, 1, 1, 1, 2, 2, 0, 2, 1, 2, 2, 3, 0, 3, 1, 3, 2, 4, 0, 4, 1, 4, 2], "lines_offsets": [0, 6, 12, 18, 24, 30], "mlines": - [1, 3] + [1, 3], "polygons_xy": [0, 0, 0, 1, 0, 2, 1, 0, 1, 1, 1, 2, 2, 0, 2, 1, 2, 2, 3, 0, 3, 1, 3, 2, 4, 0, 4, 1, 4, 2], @@ -247,11 +174,13 @@ def multipoints(self): @property def lines(self): """ - Contains the coordinates column, an offsets column, and a - mlines column. The mlines column is optional. The mlines column stores + Contains the coordinates column, an offsets column, + and a mlines column. + The mlines column is optional. The mlines column stores the indices of the offsets that indicate the beginning and end of each MultiLineString segment. The absence of an `mlines` column indicates - there are no `MultiLineStrings` in the data source, only `LineString`s. + there are no `MultiLineStrings` in the data source, + only `LineString` s. """ return self._lines @@ -313,6 +242,9 @@ def copy(self, deep=True): return result def to_host(self): + """ + Return a copy of GeoArrowBuffers backed by host data structures. + """ return GeoArrowBuffers(self, data_locale=pd) def __repr__(self): diff --git a/python/cuspatial/cuspatial/geometry/geodataframe.py b/python/cuspatial/cuspatial/geometry/geodataframe.py index a450f8946..49dda32f7 100644 --- a/python/cuspatial/cuspatial/geometry/geodataframe.py +++ b/python/cuspatial/cuspatial/geometry/geodataframe.py @@ -22,7 +22,7 @@ def __init__(self, data: gpGeoDataFrame = None): Parameters ---------- - data : A geopandas.geodataframe.GeoDataFrame object + data : A geopandas.GeoDataFrame object """ super().__init__() if isinstance(data, gpGeoDataFrame): diff --git a/python/cuspatial/cuspatial/io/geopandas.py b/python/cuspatial/cuspatial/io/geopandas.py index 44875b2e2..48354d764 100644 --- a/python/cuspatial/cuspatial/io/geopandas.py +++ b/python/cuspatial/cuspatial/io/geopandas.py @@ -15,8 +15,8 @@ def from_geopandas(gpdf): Possible inputs: - geopandas.geoseries.GeoSeries - geopandas.geodataframe.GeoDataFrame + - :class:`geopandas.GeoSeries` + - :class:`geopandas.GeoDataFrame` """ if isinstance(gpdf, gpGeoSeries): return GeoSeries(gpdf)