From 856c0b3195aa2475e296c437a7c360672f738b59 Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Sun, 6 Oct 2024 23:55:56 -0400 Subject: [PATCH 1/3] Python constructors --- python/geoarrow-core/src/constructors.rs | 169 ++++++++++++++++++++++ python/geoarrow-core/src/lib.rs | 11 +- python/pyo3-geoarrow/src/coord_buffer.rs | 149 +++++++++++++++++++ python/pyo3-geoarrow/src/lib.rs | 4 + python/pyo3-geoarrow/src/offset_buffer.rs | 33 +++++ 5 files changed, 365 insertions(+), 1 deletion(-) create mode 100644 python/geoarrow-core/src/constructors.rs create mode 100644 python/pyo3-geoarrow/src/coord_buffer.rs create mode 100644 python/pyo3-geoarrow/src/offset_buffer.rs diff --git a/python/geoarrow-core/src/constructors.rs b/python/geoarrow-core/src/constructors.rs new file mode 100644 index 00000000..df51f89f --- /dev/null +++ b/python/geoarrow-core/src/constructors.rs @@ -0,0 +1,169 @@ +use std::sync::Arc; + +use geoarrow::array::{ + LineStringArray, MultiLineStringArray, MultiPointArray, MultiPolygonArray, NativeArrayDyn, + PointArray, PolygonArray, +}; +use pyo3::prelude::*; +use pyo3_geoarrow::{PyCoordBuffer, PyGeoArrowResult, PyNativeArray, PyOffsetBuffer}; + +#[pyfunction] +pub fn points(coords: PyCoordBuffer) -> PyGeoArrowResult { + match coords { + PyCoordBuffer::TwoD(coords) => { + let array = PointArray::new(coords.into(), None, Default::default()); + Ok(PyNativeArray::new(NativeArrayDyn::new(Arc::new(array)))) + } + PyCoordBuffer::ThreeD(coords) => { + let array = PointArray::new(coords.into(), None, Default::default()); + Ok(PyNativeArray::new(NativeArrayDyn::new(Arc::new(array)))) + } + } +} + +#[pyfunction] +pub fn linestrings( + coords: PyCoordBuffer, + geom_offsets: PyOffsetBuffer, +) -> PyGeoArrowResult { + match coords { + PyCoordBuffer::TwoD(coords) => { + let array = LineStringArray::new( + coords.into(), + geom_offsets.into_inner(), + None, + Default::default(), + ); + Ok(PyNativeArray::new(NativeArrayDyn::new(Arc::new(array)))) + } + PyCoordBuffer::ThreeD(coords) => { + let array = LineStringArray::new( + coords.into(), + geom_offsets.into_inner(), + None, + Default::default(), + ); + Ok(PyNativeArray::new(NativeArrayDyn::new(Arc::new(array)))) + } + } +} + +#[pyfunction] +pub fn polygons( + coords: PyCoordBuffer, + geom_offsets: PyOffsetBuffer, + ring_offsets: PyOffsetBuffer, +) -> PyGeoArrowResult { + match coords { + PyCoordBuffer::TwoD(coords) => { + let array = PolygonArray::new( + coords.into(), + geom_offsets.into_inner(), + ring_offsets.into_inner(), + None, + Default::default(), + ); + Ok(PyNativeArray::new(NativeArrayDyn::new(Arc::new(array)))) + } + PyCoordBuffer::ThreeD(coords) => { + let array = PolygonArray::new( + coords.into(), + geom_offsets.into_inner(), + ring_offsets.into_inner(), + None, + Default::default(), + ); + Ok(PyNativeArray::new(NativeArrayDyn::new(Arc::new(array)))) + } + } +} + +#[pyfunction] +pub fn multipoints( + coords: PyCoordBuffer, + geom_offsets: PyOffsetBuffer, +) -> PyGeoArrowResult { + match coords { + PyCoordBuffer::TwoD(coords) => { + let array = MultiPointArray::new( + coords.into(), + geom_offsets.into_inner(), + None, + Default::default(), + ); + Ok(PyNativeArray::new(NativeArrayDyn::new(Arc::new(array)))) + } + PyCoordBuffer::ThreeD(coords) => { + let array = MultiPointArray::new( + coords.into(), + geom_offsets.into_inner(), + None, + Default::default(), + ); + Ok(PyNativeArray::new(NativeArrayDyn::new(Arc::new(array)))) + } + } +} + +#[pyfunction] +pub fn multilinestrings( + coords: PyCoordBuffer, + geom_offsets: PyOffsetBuffer, + ring_offsets: PyOffsetBuffer, +) -> PyGeoArrowResult { + match coords { + PyCoordBuffer::TwoD(coords) => { + let array = MultiLineStringArray::new( + coords.into(), + geom_offsets.into_inner(), + ring_offsets.into_inner(), + None, + Default::default(), + ); + Ok(PyNativeArray::new(NativeArrayDyn::new(Arc::new(array)))) + } + PyCoordBuffer::ThreeD(coords) => { + let array = MultiLineStringArray::new( + coords.into(), + geom_offsets.into_inner(), + ring_offsets.into_inner(), + None, + Default::default(), + ); + Ok(PyNativeArray::new(NativeArrayDyn::new(Arc::new(array)))) + } + } +} + +#[pyfunction] +pub fn multipolygons( + coords: PyCoordBuffer, + geom_offsets: PyOffsetBuffer, + polygon_offsets: PyOffsetBuffer, + ring_offsets: PyOffsetBuffer, +) -> PyGeoArrowResult { + match coords { + PyCoordBuffer::TwoD(coords) => { + let array = MultiPolygonArray::new( + coords.into(), + geom_offsets.into_inner(), + polygon_offsets.into_inner(), + ring_offsets.into_inner(), + None, + Default::default(), + ); + Ok(PyNativeArray::new(NativeArrayDyn::new(Arc::new(array)))) + } + PyCoordBuffer::ThreeD(coords) => { + let array = MultiPolygonArray::new( + coords.into(), + geom_offsets.into_inner(), + polygon_offsets.into_inner(), + ring_offsets.into_inner(), + None, + Default::default(), + ); + Ok(PyNativeArray::new(NativeArrayDyn::new(Arc::new(array)))) + } + } +} diff --git a/python/geoarrow-core/src/lib.rs b/python/geoarrow-core/src/lib.rs index 6308dcfc..651e62cc 100644 --- a/python/geoarrow-core/src/lib.rs +++ b/python/geoarrow-core/src/lib.rs @@ -1,8 +1,8 @@ use pyo3::prelude::*; +mod constructors; pub(crate) mod crs; pub mod ffi; pub mod interop; -// pub mod scalar; pub mod table; const VERSION: &str = env!("CARGO_PKG_VERSION"); @@ -25,6 +25,15 @@ fn _rust(_py: Python, m: &Bound) -> PyResult<()> { m.add_class::()?; m.add_class::()?; + // Constructors + + m.add_function(wrap_pyfunction!(crate::constructors::points, m)?)?; + m.add_function(wrap_pyfunction!(crate::constructors::linestrings, m)?)?; + m.add_function(wrap_pyfunction!(crate::constructors::polygons, m)?)?; + m.add_function(wrap_pyfunction!(crate::constructors::multipoints, m)?)?; + m.add_function(wrap_pyfunction!(crate::constructors::multilinestrings, m)?)?; + m.add_function(wrap_pyfunction!(crate::constructors::multipolygons, m)?)?; + // Top-level table functions m.add_function(wrap_pyfunction!(crate::table::geometry_col, m)?)?; diff --git a/python/pyo3-geoarrow/src/coord_buffer.rs b/python/pyo3-geoarrow/src/coord_buffer.rs new file mode 100644 index 00000000..f7b57d14 --- /dev/null +++ b/python/pyo3-geoarrow/src/coord_buffer.rs @@ -0,0 +1,149 @@ +use arrow::array::AsArray; +use arrow::datatypes::Float64Type; +use arrow_array::Array; +use arrow_schema::DataType; +use geoarrow::array::{CoordBuffer, InterleavedCoordBuffer, SeparatedCoordBuffer}; +use pyo3::exceptions::PyValueError; +use pyo3::prelude::*; +use pyo3_arrow::PyArray; + +pub enum PyCoordBuffer { + TwoD(CoordBuffer<2>), + ThreeD(CoordBuffer<3>), +} + +impl<'py> FromPyObject<'py> for PyCoordBuffer { + fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult { + if let Ok(arrays) = ob.extract::>() { + // Separated arrays + if arrays.len() < 2 || arrays.len() > 3 { + return Err(PyValueError::new_err(format!( + "Expected 2 or 3 arrays for each dimension, got {}.", + arrays.len() + ))); + } + + let x = arrays[0].array(); + let y = arrays[1].array(); + + if !matches!(x.data_type(), DataType::Float64) { + return Err(PyValueError::new_err(format!( + "Expected x to be float64 data type, got {}", + x.data_type() + ))); + } + + if !matches!(y.data_type(), DataType::Float64) { + return Err(PyValueError::new_err(format!( + "Expected y to be float64 data type, got {}", + y.data_type() + ))); + } + + let x = x.as_primitive::(); + let y = y.as_primitive::(); + + if x.null_count() != 0 { + return Err(PyValueError::new_err(format!( + "Cannot construct point array with null values. The 'x' array has {} null values", + x.null_count() + ))); + } + + if y.null_count() != 0 { + return Err(PyValueError::new_err(format!( + "Cannot construct point array with null values. The 'y' array has {} null values", + y.null_count() + ))); + } + + let x = x.values(); + let y = y.values(); + + if let Some(z) = arrays.get(2) { + if !matches!(z.field().data_type(), DataType::Float64) { + return Err(PyValueError::new_err(format!( + "Expected z to be float64 data type, got {}", + z.field().data_type() + ))); + } + + let z = z.array().as_primitive::(); + + if z.null_count() != 0 { + return Err(PyValueError::new_err(format!( + "Cannot construct point array with null values. The 'z' array has {} null values", + z.null_count() + ))); + } + + let z = z.values(); + + Ok(Self::ThreeD( + SeparatedCoordBuffer::new([x.clone(), y.clone(), z.clone()]).into(), + )) + } else { + Ok(Self::TwoD( + SeparatedCoordBuffer::new([x.clone(), y.clone()]).into(), + )) + } + } else if let Ok(coords) = ob.extract::() { + match coords.field().data_type() { + DataType::FixedSizeList(inner_field, list_size) => { + if !matches!(inner_field.data_type(), DataType::Float64) { + return Err(PyValueError::new_err(format!( + "Expected the inner field of coords to be float64 data type, got {}", + inner_field.data_type() + ))); + } + + let coords = coords.as_ref().as_fixed_size_list(); + + if coords.null_count() != 0 { + return Err(PyValueError::new_err(format!( + "Cannot have null values in coordinate fixed size list array. {} null values present.", + coords.null_count() + )) + ); + } + + let values = coords.values(); + let values = values.as_primitive::(); + + if values.null_count() != 0 { + return Err(PyValueError::new_err(format!( + "Cannot construct point array with null values in the inner buffer of the coordinate array. The values of the fixed size list array array has {} null values", + values.null_count() + )) + ); + } + + match list_size { + 2 => Ok(Self::TwoD( + InterleavedCoordBuffer::<2>::new(values.values().clone()).into(), + )), + 3 => Ok(Self::ThreeD( + InterleavedCoordBuffer::<3>::new(values.values().clone()).into(), + )), + _ => { + return Err(PyValueError::new_err(format!( + "Unsupported fixed size list size {}", + list_size + ))); + } + } + } + dt => { + return Err(PyValueError::new_err(format!( + "Expected coords to be FixedSizeList data type, got {}", + dt + ))); + } + } + } else { + Err(PyValueError::new_err( + "Expected array-like or iterable of array-like for coordinate buffer.", + )) + } + } +} diff --git a/python/pyo3-geoarrow/src/lib.rs b/python/pyo3-geoarrow/src/lib.rs index 556b02df..55543932 100644 --- a/python/pyo3-geoarrow/src/lib.rs +++ b/python/pyo3-geoarrow/src/lib.rs @@ -1,16 +1,20 @@ mod array; mod chunked_array; +mod coord_buffer; mod coord_type; mod data_type; mod dimension; mod error; mod ffi; +mod offset_buffer; mod scalar; pub use array::{PyNativeArray, PySerializedArray}; pub use chunked_array::PyChunkedNativeArray; +pub use coord_buffer::PyCoordBuffer; pub use coord_type::PyCoordType; pub use data_type::{PyNativeType, PySerializedType}; pub use dimension::PyDimension; pub use error::{PyGeoArrowError, PyGeoArrowResult}; +pub use offset_buffer::PyOffsetBuffer; pub use scalar::PyGeometry; diff --git a/python/pyo3-geoarrow/src/offset_buffer.rs b/python/pyo3-geoarrow/src/offset_buffer.rs new file mode 100644 index 00000000..8710a976 --- /dev/null +++ b/python/pyo3-geoarrow/src/offset_buffer.rs @@ -0,0 +1,33 @@ +use arrow::array::AsArray; +use arrow::compute::cast; +use arrow::datatypes::Int32Type; +use arrow_buffer::OffsetBuffer; +use arrow_schema::DataType; +use pyo3::exceptions::PyValueError; +use pyo3::prelude::*; +use pyo3_arrow::PyArray; + +use crate::PyGeoArrowError; + +pub struct PyOffsetBuffer(OffsetBuffer); + +impl PyOffsetBuffer { + pub fn into_inner(self) -> OffsetBuffer { + self.0 + } +} + +impl<'py> FromPyObject<'py> for PyOffsetBuffer { + fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult { + let ob = ob.extract::()?; + if ob.array().null_count() != 0 { + return Err(PyValueError::new_err(format!( + "Cannot construct offset buffer with nulls. Got {} nulls.", + ob.array().null_count() + ))); + } + let offsets = cast(ob.as_ref(), &DataType::Int32).map_err(PyGeoArrowError::from)?; + let offsets = offsets.as_ref().as_primitive::(); + Ok(Self(OffsetBuffer::new(offsets.values().clone()))) + } +} From 64b71784847098caf9c735eba5ac5b191d76e592 Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Sun, 6 Oct 2024 23:57:19 -0400 Subject: [PATCH 2/3] lint --- python/geoarrow-core/src/constructors.rs | 48 ++++++++---------------- 1 file changed, 16 insertions(+), 32 deletions(-) diff --git a/python/geoarrow-core/src/constructors.rs b/python/geoarrow-core/src/constructors.rs index df51f89f..85dc9a5e 100644 --- a/python/geoarrow-core/src/constructors.rs +++ b/python/geoarrow-core/src/constructors.rs @@ -11,11 +11,11 @@ use pyo3_geoarrow::{PyCoordBuffer, PyGeoArrowResult, PyNativeArray, PyOffsetBuff pub fn points(coords: PyCoordBuffer) -> PyGeoArrowResult { match coords { PyCoordBuffer::TwoD(coords) => { - let array = PointArray::new(coords.into(), None, Default::default()); + let array = PointArray::new(coords, None, Default::default()); Ok(PyNativeArray::new(NativeArrayDyn::new(Arc::new(array)))) } PyCoordBuffer::ThreeD(coords) => { - let array = PointArray::new(coords.into(), None, Default::default()); + let array = PointArray::new(coords, None, Default::default()); Ok(PyNativeArray::new(NativeArrayDyn::new(Arc::new(array)))) } } @@ -28,21 +28,13 @@ pub fn linestrings( ) -> PyGeoArrowResult { match coords { PyCoordBuffer::TwoD(coords) => { - let array = LineStringArray::new( - coords.into(), - geom_offsets.into_inner(), - None, - Default::default(), - ); + let array = + LineStringArray::new(coords, geom_offsets.into_inner(), None, Default::default()); Ok(PyNativeArray::new(NativeArrayDyn::new(Arc::new(array)))) } PyCoordBuffer::ThreeD(coords) => { - let array = LineStringArray::new( - coords.into(), - geom_offsets.into_inner(), - None, - Default::default(), - ); + let array = + LineStringArray::new(coords, geom_offsets.into_inner(), None, Default::default()); Ok(PyNativeArray::new(NativeArrayDyn::new(Arc::new(array)))) } } @@ -57,7 +49,7 @@ pub fn polygons( match coords { PyCoordBuffer::TwoD(coords) => { let array = PolygonArray::new( - coords.into(), + coords, geom_offsets.into_inner(), ring_offsets.into_inner(), None, @@ -67,7 +59,7 @@ pub fn polygons( } PyCoordBuffer::ThreeD(coords) => { let array = PolygonArray::new( - coords.into(), + coords, geom_offsets.into_inner(), ring_offsets.into_inner(), None, @@ -85,21 +77,13 @@ pub fn multipoints( ) -> PyGeoArrowResult { match coords { PyCoordBuffer::TwoD(coords) => { - let array = MultiPointArray::new( - coords.into(), - geom_offsets.into_inner(), - None, - Default::default(), - ); + let array = + MultiPointArray::new(coords, geom_offsets.into_inner(), None, Default::default()); Ok(PyNativeArray::new(NativeArrayDyn::new(Arc::new(array)))) } PyCoordBuffer::ThreeD(coords) => { - let array = MultiPointArray::new( - coords.into(), - geom_offsets.into_inner(), - None, - Default::default(), - ); + let array = + MultiPointArray::new(coords, geom_offsets.into_inner(), None, Default::default()); Ok(PyNativeArray::new(NativeArrayDyn::new(Arc::new(array)))) } } @@ -114,7 +98,7 @@ pub fn multilinestrings( match coords { PyCoordBuffer::TwoD(coords) => { let array = MultiLineStringArray::new( - coords.into(), + coords, geom_offsets.into_inner(), ring_offsets.into_inner(), None, @@ -124,7 +108,7 @@ pub fn multilinestrings( } PyCoordBuffer::ThreeD(coords) => { let array = MultiLineStringArray::new( - coords.into(), + coords, geom_offsets.into_inner(), ring_offsets.into_inner(), None, @@ -145,7 +129,7 @@ pub fn multipolygons( match coords { PyCoordBuffer::TwoD(coords) => { let array = MultiPolygonArray::new( - coords.into(), + coords, geom_offsets.into_inner(), polygon_offsets.into_inner(), ring_offsets.into_inner(), @@ -156,7 +140,7 @@ pub fn multipolygons( } PyCoordBuffer::ThreeD(coords) => { let array = MultiPolygonArray::new( - coords.into(), + coords, geom_offsets.into_inner(), polygon_offsets.into_inner(), ring_offsets.into_inner(), From 2e67bf28c67d1e8a00cf9a67aa721ee135f7c066 Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Mon, 7 Oct 2024 00:25:33 -0400 Subject: [PATCH 3/3] Explicitly check for list/tuple input --- python/geoarrow-compute/pyproject.toml | 2 +- python/geoarrow-core/pyproject.toml | 2 +- .../geoarrow/rust/core/_constructors.pyi | 32 +++++++++++ .../python/geoarrow/rust/core/_rust.pyi | 7 +++ python/geoarrow-io/pyproject.toml | 2 +- python/pyo3-geoarrow/src/coord_buffer.rs | 34 +++++------- python/tests/core/test_constructors.py | 55 +++++++++++++++++++ 7 files changed, 112 insertions(+), 22 deletions(-) create mode 100644 python/geoarrow-core/python/geoarrow/rust/core/_constructors.pyi create mode 100644 python/tests/core/test_constructors.py diff --git a/python/geoarrow-compute/pyproject.toml b/python/geoarrow-compute/pyproject.toml index 7a7b4d80..da2b7d1b 100644 --- a/python/geoarrow-compute/pyproject.toml +++ b/python/geoarrow-compute/pyproject.toml @@ -5,7 +5,7 @@ build-backend = "maturin" [project] name = "geoarrow-rust-compute" requires-python = ">=3.8" -dependencies = ["arro3-core", "geoarrow-rust-core", "pyproj"] +dependencies = ["arro3-core>=0.4", "geoarrow-rust-core", "pyproj"] classifiers = [ "Programming Language :: Rust", "Programming Language :: Python :: Implementation :: CPython", diff --git a/python/geoarrow-core/pyproject.toml b/python/geoarrow-core/pyproject.toml index cd2d7151..ec908823 100644 --- a/python/geoarrow-core/pyproject.toml +++ b/python/geoarrow-core/pyproject.toml @@ -5,7 +5,7 @@ build-backend = "maturin" [project] name = "geoarrow-rust-core" requires-python = ">=3.8" -dependencies = ["arro3-core", "pyproj"] +dependencies = ["arro3-core>=0.4", "pyproj"] classifiers = [ "Programming Language :: Rust", "Programming Language :: Python :: Implementation :: CPython", diff --git a/python/geoarrow-core/python/geoarrow/rust/core/_constructors.pyi b/python/geoarrow-core/python/geoarrow/rust/core/_constructors.pyi new file mode 100644 index 00000000..23ab05b2 --- /dev/null +++ b/python/geoarrow-core/python/geoarrow/rust/core/_constructors.pyi @@ -0,0 +1,32 @@ +from typing import List, Tuple + +from arro3.core.types import ArrayInput + +from ._rust import NativeArray + +def points( + coords: ArrayInput | Tuple[ArrayInput, ...] | List[ArrayInput], +) -> NativeArray: ... +def linestrings( + coords: ArrayInput | Tuple[ArrayInput, ...] | List[ArrayInput], + geom_offsets: ArrayInput, +) -> NativeArray: ... +def polygons( + coords: ArrayInput | Tuple[ArrayInput, ...] | List[ArrayInput], + geom_offsets: ArrayInput, + ring_offsets: ArrayInput, +) -> NativeArray: ... +def multipoints( + coords: ArrayInput | Tuple[ArrayInput, ...] | List[ArrayInput], +) -> NativeArray: ... +def multilinestrings( + coords: ArrayInput | Tuple[ArrayInput, ...] | List[ArrayInput], + geom_offsets: ArrayInput, + ring_offsets: ArrayInput, +) -> NativeArray: ... +def multipolygons( + coords: ArrayInput | Tuple[ArrayInput, ...] | List[ArrayInput], + geom_offsets: ArrayInput, + polygon_offsets: ArrayInput, + ring_offsets: ArrayInput, +) -> NativeArray: ... diff --git a/python/geoarrow-core/python/geoarrow/rust/core/_rust.pyi b/python/geoarrow-core/python/geoarrow/rust/core/_rust.pyi index 6fe700ca..335c62cc 100644 --- a/python/geoarrow-core/python/geoarrow/rust/core/_rust.pyi +++ b/python/geoarrow-core/python/geoarrow/rust/core/_rust.pyi @@ -32,6 +32,13 @@ except ImportError: from .enums import CoordType, Dimension from .types import CoordTypeT, DimensionT +from geoarrow.rust.core._constructors import points as points +from geoarrow.rust.core._constructors import linestrings as linestrings +from geoarrow.rust.core._constructors import polygons as polygons +from geoarrow.rust.core._constructors import multipoints as multipoints +from geoarrow.rust.core._constructors import multilinestrings as multilinestrings +from geoarrow.rust.core._constructors import multipolygons as multipolygons + class Geometry: """ An immutable geometry scalar using GeoArrow's in-memory representation. diff --git a/python/geoarrow-io/pyproject.toml b/python/geoarrow-io/pyproject.toml index 85f64d58..5882842b 100644 --- a/python/geoarrow-io/pyproject.toml +++ b/python/geoarrow-io/pyproject.toml @@ -5,7 +5,7 @@ build-backend = "maturin" [project] name = "geoarrow-rust-io" requires-python = ">=3.8" -dependencies = ["arro3-core", "pyproj"] +dependencies = ["arro3-core>=0.4", "pyproj"] classifiers = [ "Programming Language :: Rust", "Programming Language :: Python :: Implementation :: CPython", diff --git a/python/pyo3-geoarrow/src/coord_buffer.rs b/python/pyo3-geoarrow/src/coord_buffer.rs index f7b57d14..5e8a1706 100644 --- a/python/pyo3-geoarrow/src/coord_buffer.rs +++ b/python/pyo3-geoarrow/src/coord_buffer.rs @@ -5,6 +5,7 @@ use arrow_schema::DataType; use geoarrow::array::{CoordBuffer, InterleavedCoordBuffer, SeparatedCoordBuffer}; use pyo3::exceptions::PyValueError; use pyo3::prelude::*; +use pyo3::types::{PyList, PyTuple}; use pyo3_arrow::PyArray; pub enum PyCoordBuffer { @@ -14,8 +15,9 @@ pub enum PyCoordBuffer { impl<'py> FromPyObject<'py> for PyCoordBuffer { fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult { - if let Ok(arrays) = ob.extract::>() { - // Separated arrays + if ob.is_instance_of::() || ob.is_instance_of::() { + let arrays = ob.extract::>()?; + if arrays.len() < 2 || arrays.len() > 3 { return Err(PyValueError::new_err(format!( "Expected 2 or 3 arrays for each dimension, got {}.", @@ -87,7 +89,9 @@ impl<'py> FromPyObject<'py> for PyCoordBuffer { SeparatedCoordBuffer::new([x.clone(), y.clone()]).into(), )) } - } else if let Ok(coords) = ob.extract::() { + } else { + let coords = ob.extract::()?; + match coords.field().data_type() { DataType::FixedSizeList(inner_field, list_size) => { if !matches!(inner_field.data_type(), DataType::Float64) { @@ -125,25 +129,17 @@ impl<'py> FromPyObject<'py> for PyCoordBuffer { 3 => Ok(Self::ThreeD( InterleavedCoordBuffer::<3>::new(values.values().clone()).into(), )), - _ => { - return Err(PyValueError::new_err(format!( - "Unsupported fixed size list size {}", - list_size - ))); - } + _ => Err(PyValueError::new_err(format!( + "Unsupported fixed size list size {}", + list_size + ))), } } - dt => { - return Err(PyValueError::new_err(format!( - "Expected coords to be FixedSizeList data type, got {}", - dt - ))); - } + dt => Err(PyValueError::new_err(format!( + "Expected coords to be FixedSizeList data type, got {}", + dt + ))), } - } else { - Err(PyValueError::new_err( - "Expected array-like or iterable of array-like for coordinate buffer.", - )) } } } diff --git a/python/tests/core/test_constructors.py b/python/tests/core/test_constructors.py new file mode 100644 index 00000000..8488815f --- /dev/null +++ b/python/tests/core/test_constructors.py @@ -0,0 +1,55 @@ +import numpy as np +import pyarrow as pa +import pytest +from geoarrow.rust.core import linestrings, points + + +def test_points_2d(): + coords = np.random.rand(10, 2) + point_arr = points(coords) + point_arr = pa.array(point_arr) + assert point_arr[0][0].as_py() == coords[0, 0] + assert point_arr[0][1].as_py() == coords[0, 1] + + coords_retour = point_arr.values.to_numpy().reshape(-1, 2) + assert np.allclose(coords, coords_retour) + + with pytest.raises(ValueError, match="Buffer is not C contiguous"): + points((coords[:, 0], coords[:, 1])) + + x = np.ascontiguousarray(coords[:, 0]) + y = np.ascontiguousarray(coords[:, 1]) + point_arr2 = points((x, y)) + point_arr2 = pa.array(point_arr2) + assert point_arr2[0][0].as_py() == coords[0, 0] + assert point_arr2[0][1].as_py() == coords[0, 1] + + coords_retour2 = np.column_stack( + [ + point_arr2.field("x"), + point_arr2.field("y"), + ] + ) + assert np.allclose(coords, coords_retour2) + + +def test_points_3d(): + coords = np.random.rand(10, 3) + point_arr = points(coords) + point_arr = pa.array(point_arr) + assert point_arr[0][0].as_py() == coords[0, 0] + assert point_arr[0][1].as_py() == coords[0, 1] + assert point_arr[0][2].as_py() == coords[0, 2] + + +def test_linestrings(): + coords = np.random.rand(10, 2) + geom_offsets = np.array([0, 2, 6, 10], dtype=np.int32) + geom_arr = linestrings(coords, geom_offsets) + geom_arr = pa.array(geom_arr) + assert len(geom_arr) == 3 + assert len(geom_arr[0]) == 2 + assert len(geom_arr[1]) == 4 + assert len(geom_arr[2]) == 4 + + assert np.allclose(coords, geom_arr.values.values.to_numpy().reshape(-1, 2))