Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Python geometry array constructors #810

Merged
merged 3 commits into from
Oct 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion python/geoarrow-compute/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ build-backend = "maturin"
[project]
name = "geoarrow-rust-compute"
requires-python = ">=3.8"
dependencies = ["arro3-core", "geoarrow-rust-core", "pyproj"]
dependencies = ["arro3-core>=0.4", "geoarrow-rust-core", "pyproj"]
classifiers = [
"Programming Language :: Rust",
"Programming Language :: Python :: Implementation :: CPython",
Expand Down
2 changes: 1 addition & 1 deletion python/geoarrow-core/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ build-backend = "maturin"
[project]
name = "geoarrow-rust-core"
requires-python = ">=3.8"
dependencies = ["arro3-core", "pyproj"]
dependencies = ["arro3-core>=0.4", "pyproj"]
classifiers = [
"Programming Language :: Rust",
"Programming Language :: Python :: Implementation :: CPython",
Expand Down
32 changes: 32 additions & 0 deletions python/geoarrow-core/python/geoarrow/rust/core/_constructors.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
from typing import List, Tuple

from arro3.core.types import ArrayInput

from ._rust import NativeArray

def points(
coords: ArrayInput | Tuple[ArrayInput, ...] | List[ArrayInput],
) -> NativeArray: ...
def linestrings(
coords: ArrayInput | Tuple[ArrayInput, ...] | List[ArrayInput],
geom_offsets: ArrayInput,
) -> NativeArray: ...
def polygons(
coords: ArrayInput | Tuple[ArrayInput, ...] | List[ArrayInput],
geom_offsets: ArrayInput,
ring_offsets: ArrayInput,
) -> NativeArray: ...
def multipoints(
coords: ArrayInput | Tuple[ArrayInput, ...] | List[ArrayInput],
) -> NativeArray: ...
def multilinestrings(
coords: ArrayInput | Tuple[ArrayInput, ...] | List[ArrayInput],
geom_offsets: ArrayInput,
ring_offsets: ArrayInput,
) -> NativeArray: ...
def multipolygons(
coords: ArrayInput | Tuple[ArrayInput, ...] | List[ArrayInput],
geom_offsets: ArrayInput,
polygon_offsets: ArrayInput,
ring_offsets: ArrayInput,
) -> NativeArray: ...
7 changes: 7 additions & 0 deletions python/geoarrow-core/python/geoarrow/rust/core/_rust.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,13 @@ except ImportError:
from .enums import CoordType, Dimension
from .types import CoordTypeT, DimensionT

from geoarrow.rust.core._constructors import points as points
from geoarrow.rust.core._constructors import linestrings as linestrings
from geoarrow.rust.core._constructors import polygons as polygons
from geoarrow.rust.core._constructors import multipoints as multipoints
from geoarrow.rust.core._constructors import multilinestrings as multilinestrings
from geoarrow.rust.core._constructors import multipolygons as multipolygons

class Geometry:
"""
An immutable geometry scalar using GeoArrow's in-memory representation.
Expand Down
153 changes: 153 additions & 0 deletions python/geoarrow-core/src/constructors.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
use std::sync::Arc;

use geoarrow::array::{
LineStringArray, MultiLineStringArray, MultiPointArray, MultiPolygonArray, NativeArrayDyn,
PointArray, PolygonArray,
};
use pyo3::prelude::*;
use pyo3_geoarrow::{PyCoordBuffer, PyGeoArrowResult, PyNativeArray, PyOffsetBuffer};

#[pyfunction]
pub fn points(coords: PyCoordBuffer) -> PyGeoArrowResult<PyNativeArray> {
match coords {
PyCoordBuffer::TwoD(coords) => {
let array = PointArray::new(coords, None, Default::default());
Ok(PyNativeArray::new(NativeArrayDyn::new(Arc::new(array))))
}
PyCoordBuffer::ThreeD(coords) => {
let array = PointArray::new(coords, None, Default::default());
Ok(PyNativeArray::new(NativeArrayDyn::new(Arc::new(array))))
}
}
}

#[pyfunction]
pub fn linestrings(
coords: PyCoordBuffer,
geom_offsets: PyOffsetBuffer,
) -> PyGeoArrowResult<PyNativeArray> {
match coords {
PyCoordBuffer::TwoD(coords) => {
let array =
LineStringArray::new(coords, geom_offsets.into_inner(), None, Default::default());
Ok(PyNativeArray::new(NativeArrayDyn::new(Arc::new(array))))
}
PyCoordBuffer::ThreeD(coords) => {
let array =
LineStringArray::new(coords, geom_offsets.into_inner(), None, Default::default());
Ok(PyNativeArray::new(NativeArrayDyn::new(Arc::new(array))))
}
}
}

#[pyfunction]
pub fn polygons(
coords: PyCoordBuffer,
geom_offsets: PyOffsetBuffer,
ring_offsets: PyOffsetBuffer,
) -> PyGeoArrowResult<PyNativeArray> {
match coords {
PyCoordBuffer::TwoD(coords) => {
let array = PolygonArray::new(
coords,
geom_offsets.into_inner(),
ring_offsets.into_inner(),
None,
Default::default(),
);
Ok(PyNativeArray::new(NativeArrayDyn::new(Arc::new(array))))
}
PyCoordBuffer::ThreeD(coords) => {
let array = PolygonArray::new(
coords,
geom_offsets.into_inner(),
ring_offsets.into_inner(),
None,
Default::default(),
);
Ok(PyNativeArray::new(NativeArrayDyn::new(Arc::new(array))))
}
}
}

#[pyfunction]
pub fn multipoints(
coords: PyCoordBuffer,
geom_offsets: PyOffsetBuffer,
) -> PyGeoArrowResult<PyNativeArray> {
match coords {
PyCoordBuffer::TwoD(coords) => {
let array =
MultiPointArray::new(coords, geom_offsets.into_inner(), None, Default::default());
Ok(PyNativeArray::new(NativeArrayDyn::new(Arc::new(array))))
}
PyCoordBuffer::ThreeD(coords) => {
let array =
MultiPointArray::new(coords, geom_offsets.into_inner(), None, Default::default());
Ok(PyNativeArray::new(NativeArrayDyn::new(Arc::new(array))))
}
}
}

#[pyfunction]
pub fn multilinestrings(
coords: PyCoordBuffer,
geom_offsets: PyOffsetBuffer,
ring_offsets: PyOffsetBuffer,
) -> PyGeoArrowResult<PyNativeArray> {
match coords {
PyCoordBuffer::TwoD(coords) => {
let array = MultiLineStringArray::new(
coords,
geom_offsets.into_inner(),
ring_offsets.into_inner(),
None,
Default::default(),
);
Ok(PyNativeArray::new(NativeArrayDyn::new(Arc::new(array))))
}
PyCoordBuffer::ThreeD(coords) => {
let array = MultiLineStringArray::new(
coords,
geom_offsets.into_inner(),
ring_offsets.into_inner(),
None,
Default::default(),
);
Ok(PyNativeArray::new(NativeArrayDyn::new(Arc::new(array))))
}
}
}

#[pyfunction]
pub fn multipolygons(
coords: PyCoordBuffer,
geom_offsets: PyOffsetBuffer,
polygon_offsets: PyOffsetBuffer,
ring_offsets: PyOffsetBuffer,
) -> PyGeoArrowResult<PyNativeArray> {
match coords {
PyCoordBuffer::TwoD(coords) => {
let array = MultiPolygonArray::new(
coords,
geom_offsets.into_inner(),
polygon_offsets.into_inner(),
ring_offsets.into_inner(),
None,
Default::default(),
);
Ok(PyNativeArray::new(NativeArrayDyn::new(Arc::new(array))))
}
PyCoordBuffer::ThreeD(coords) => {
let array = MultiPolygonArray::new(
coords,
geom_offsets.into_inner(),
polygon_offsets.into_inner(),
ring_offsets.into_inner(),
None,
Default::default(),
);
Ok(PyNativeArray::new(NativeArrayDyn::new(Arc::new(array))))
}
}
}
11 changes: 10 additions & 1 deletion python/geoarrow-core/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
use pyo3::prelude::*;
mod constructors;
pub(crate) mod crs;
pub mod ffi;
pub mod interop;
// pub mod scalar;
pub mod table;

const VERSION: &str = env!("CARGO_PKG_VERSION");
Expand All @@ -25,6 +25,15 @@ fn _rust(_py: Python, m: &Bound<PyModule>) -> PyResult<()> {
m.add_class::<pyo3_geoarrow::PySerializedArray>()?;
m.add_class::<pyo3_geoarrow::PySerializedType>()?;

// Constructors

m.add_function(wrap_pyfunction!(crate::constructors::points, m)?)?;
m.add_function(wrap_pyfunction!(crate::constructors::linestrings, m)?)?;
m.add_function(wrap_pyfunction!(crate::constructors::polygons, m)?)?;
m.add_function(wrap_pyfunction!(crate::constructors::multipoints, m)?)?;
m.add_function(wrap_pyfunction!(crate::constructors::multilinestrings, m)?)?;
m.add_function(wrap_pyfunction!(crate::constructors::multipolygons, m)?)?;

// Top-level table functions

m.add_function(wrap_pyfunction!(crate::table::geometry_col, m)?)?;
Expand Down
2 changes: 1 addition & 1 deletion python/geoarrow-io/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ build-backend = "maturin"
[project]
name = "geoarrow-rust-io"
requires-python = ">=3.8"
dependencies = ["arro3-core", "pyproj"]
dependencies = ["arro3-core>=0.4", "pyproj"]
classifiers = [
"Programming Language :: Rust",
"Programming Language :: Python :: Implementation :: CPython",
Expand Down
145 changes: 145 additions & 0 deletions python/pyo3-geoarrow/src/coord_buffer.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
use arrow::array::AsArray;
use arrow::datatypes::Float64Type;
use arrow_array::Array;
use arrow_schema::DataType;
use geoarrow::array::{CoordBuffer, InterleavedCoordBuffer, SeparatedCoordBuffer};
use pyo3::exceptions::PyValueError;
use pyo3::prelude::*;
use pyo3::types::{PyList, PyTuple};
use pyo3_arrow::PyArray;

pub enum PyCoordBuffer {
TwoD(CoordBuffer<2>),
ThreeD(CoordBuffer<3>),
}

impl<'py> FromPyObject<'py> for PyCoordBuffer {
fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
if ob.is_instance_of::<PyTuple>() || ob.is_instance_of::<PyList>() {
let arrays = ob.extract::<Vec<PyArray>>()?;

if arrays.len() < 2 || arrays.len() > 3 {
return Err(PyValueError::new_err(format!(
"Expected 2 or 3 arrays for each dimension, got {}.",
arrays.len()
)));
}

let x = arrays[0].array();
let y = arrays[1].array();

if !matches!(x.data_type(), DataType::Float64) {
return Err(PyValueError::new_err(format!(
"Expected x to be float64 data type, got {}",
x.data_type()
)));
}

if !matches!(y.data_type(), DataType::Float64) {
return Err(PyValueError::new_err(format!(
"Expected y to be float64 data type, got {}",
y.data_type()
)));
}

let x = x.as_primitive::<Float64Type>();
let y = y.as_primitive::<Float64Type>();

if x.null_count() != 0 {
return Err(PyValueError::new_err(format!(
"Cannot construct point array with null values. The 'x' array has {} null values",
x.null_count()
)));
}

if y.null_count() != 0 {
return Err(PyValueError::new_err(format!(
"Cannot construct point array with null values. The 'y' array has {} null values",
y.null_count()
)));
}

let x = x.values();
let y = y.values();

if let Some(z) = arrays.get(2) {
if !matches!(z.field().data_type(), DataType::Float64) {
return Err(PyValueError::new_err(format!(
"Expected z to be float64 data type, got {}",
z.field().data_type()
)));
}

let z = z.array().as_primitive::<Float64Type>();

if z.null_count() != 0 {
return Err(PyValueError::new_err(format!(
"Cannot construct point array with null values. The 'z' array has {} null values",
z.null_count()
)));
}

let z = z.values();

Ok(Self::ThreeD(
SeparatedCoordBuffer::new([x.clone(), y.clone(), z.clone()]).into(),
))
} else {
Ok(Self::TwoD(
SeparatedCoordBuffer::new([x.clone(), y.clone()]).into(),
))
}
} else {
let coords = ob.extract::<PyArray>()?;

match coords.field().data_type() {
DataType::FixedSizeList(inner_field, list_size) => {
if !matches!(inner_field.data_type(), DataType::Float64) {
return Err(PyValueError::new_err(format!(
"Expected the inner field of coords to be float64 data type, got {}",
inner_field.data_type()
)));
}

let coords = coords.as_ref().as_fixed_size_list();

if coords.null_count() != 0 {
return Err(PyValueError::new_err(format!(
"Cannot have null values in coordinate fixed size list array. {} null values present.",
coords.null_count()
))
);
}

let values = coords.values();
let values = values.as_primitive::<Float64Type>();

if values.null_count() != 0 {
return Err(PyValueError::new_err(format!(
"Cannot construct point array with null values in the inner buffer of the coordinate array. The values of the fixed size list array array has {} null values",
values.null_count()
))
);
}

match list_size {
2 => Ok(Self::TwoD(
InterleavedCoordBuffer::<2>::new(values.values().clone()).into(),
)),
3 => Ok(Self::ThreeD(
InterleavedCoordBuffer::<3>::new(values.values().clone()).into(),
)),
_ => Err(PyValueError::new_err(format!(
"Unsupported fixed size list size {}",
list_size
))),
}
}
dt => Err(PyValueError::new_err(format!(
"Expected coords to be FixedSizeList data type, got {}",
dt
))),
}
}
}
}
Loading
Loading