diff --git a/.github/workflows/integration-ffi.yml b/.github/workflows/integration-ffi.yml index 8f0797eea12..3fce8f24fd8 100644 --- a/.github/workflows/integration-ffi.yml +++ b/.github/workflows/integration-ffi.yml @@ -38,6 +38,6 @@ jobs: python -m venv venv source venv/bin/activate - pip install maturin==0.10.2 toml==0.10.1 pyarrow==4.0.0 + pip install maturin==0.10.2 toml==0.10.1 pyarrow==5.0.0 maturin develop python -m unittest discover tests diff --git a/arrow-pyarrow-integration-testing/Cargo.toml b/arrow-pyarrow-integration-testing/Cargo.toml index 97a64a9c509..551868e30ea 100644 --- a/arrow-pyarrow-integration-testing/Cargo.toml +++ b/arrow-pyarrow-integration-testing/Cargo.toml @@ -17,13 +17,9 @@ [package] name = "arrow-pyarrow-integration-testing" -description = "" -version = "4.0.0-SNAPSHOT" -homepage = "https://github.com/apache/arrow-rs" -repository = "https://github.com/apache/arrow-rs" -authors = ["Apache Arrow "] +version = "0.0.0" +authors = ["Jorge C. Leitao ", "Apache Arrow "] license = "Apache-2.0" -keywords = [ "arrow" ] edition = "2018" [lib] @@ -31,8 +27,8 @@ name = "arrow_pyarrow_integration_testing" crate-type = ["cdylib"] [dependencies] -arrow2 = { path = "../", default-features = false, features = ["compute"] } -pyo3 = { version = "0.12.1", features = ["extension-module"] } +arrow2 = { path = "../", default-features = false } +pyo3 = { version = "0.14", features = ["extension-module"] } [package.metadata.maturin] requires-dist = ["pyarrow>=1"] diff --git a/arrow-pyarrow-integration-testing/src/lib.rs b/arrow-pyarrow-integration-testing/src/lib.rs index eaf08b5834f..0f6bdf8e2f7 100644 --- a/arrow-pyarrow-integration-testing/src/lib.rs +++ b/arrow-pyarrow-integration-testing/src/lib.rs @@ -1,20 +1,3 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - //! This library demonstrates a minimal usage of Rust's C data interface to pass //! arrays from and to Python. @@ -23,18 +6,11 @@ use std::fmt; use std::sync::Arc; use pyo3::exceptions::PyOSError; +use pyo3::ffi::Py_uintptr_t; +use pyo3::prelude::*; use pyo3::wrap_pyfunction; -use pyo3::{libc::uintptr_t, prelude::*}; - -use arrow2::array::{Array, Int64Array}; -use arrow2::ffi; -use arrow2::{array::PrimitiveArray, compute}; -use arrow2::{ - datatypes::{DataType, Field}, - error::ArrowError, -}; -type ArrayRef = Arc; +use arrow2::{array::Array, datatypes::Field, error::ArrowError, ffi}; /// an error that bridges ArrowError with a Python error #[derive(Debug)] @@ -73,7 +49,7 @@ impl From for PyErr { } } -fn to_rust(ob: PyObject, py: Python) -> PyResult { +fn to_rust_array(ob: PyObject, py: Python) -> PyResult> { // prepare a pointer to receive the Array struct let array = Box::new(ffi::Ffi_ArrowArray::empty()); let schema = Box::new(ffi::Ffi_ArrowSchema::empty()); @@ -86,7 +62,7 @@ fn to_rust(ob: PyObject, py: Python) -> PyResult { ob.call_method1( py, "_export_to_c", - (array_ptr as uintptr_t, schema_ptr as uintptr_t), + (array_ptr as Py_uintptr_t, schema_ptr as Py_uintptr_t), )?; let field = ffi::import_field_from_c(schema.as_ref()).map_err(PyO3ArrowError::from)?; @@ -95,7 +71,7 @@ fn to_rust(ob: PyObject, py: Python) -> PyResult { Ok(array.into()) } -fn to_py(array: ArrayRef, py: Python) -> PyResult { +fn to_py_array(array: Arc, py: Python) -> PyResult { let array_ptr = Box::new(ffi::Ffi_ArrowArray::empty()); let schema_ptr = Box::new(ffi::Ffi_ArrowSchema::empty()); @@ -111,7 +87,7 @@ fn to_py(array: ArrayRef, py: Python) -> PyResult { let array = pa.getattr("Array")?.call_method1( "_import_from_c", - (array_ptr as uintptr_t, schema_ptr as uintptr_t), + (array_ptr as Py_uintptr_t, schema_ptr as Py_uintptr_t), )?; unsafe { @@ -122,99 +98,58 @@ fn to_py(array: ArrayRef, py: Python) -> PyResult { Ok(array.to_object(py)) } -/// Returns `array + array` of an int64 array. -#[pyfunction] -fn double(array: PyObject, py: Python) -> PyResult { - // import - let array = to_rust(array, py)?; - - // perform some operation - let array = array.as_any().downcast_ref::().ok_or_else(|| { - PyO3ArrowError::ArrowError(ArrowError::Ffi("Expects an int64".to_string())) - })?; - let array = - compute::arithmetics::basic::add::add(array, array).map_err(PyO3ArrowError::from)?; - let array = Arc::new(array); - - // export - to_py(array, py) -} - -/// calls a lambda function that receives and returns an array -/// whose result must be the array multiplied by two -#[pyfunction] -fn double_py(lambda: PyObject, py: Python) -> PyResult { - // create - let array = Arc::new(PrimitiveArray::::from(vec![Some(1), None, Some(3)])); - let expected = Arc::new(PrimitiveArray::::from(vec![Some(2), None, Some(6)])) as ArrayRef; +fn to_rust_field(ob: PyObject, py: Python) -> PyResult { + // prepare a pointer to receive the Array struct + let schema = Box::new(ffi::Ffi_ArrowSchema::empty()); - // to py - let array = to_py(array, py)?; + let schema_ptr = &*schema as *const ffi::Ffi_ArrowSchema; - let array = lambda.call1(py, (array,))?; + // make the conversion through PyArrow's private API + // this changes the pointer's memory and is thus unsafe. In particular, `_export_to_c` can go out of bounds + ob.call_method1(py, "_export_to_c", (schema_ptr as Py_uintptr_t,))?; - let array = to_rust(array, py)?; + let field = ffi::import_field_from_c(schema.as_ref()).map_err(PyO3ArrowError::from)?; - Ok(array == expected) + Ok(field) } -/// Returns the substring -#[pyfunction] -fn substring(array: PyObject, start: i64, py: Python) -> PyResult { - // import - let array = to_rust(array, py)?; +fn to_py_field(field: &Field, py: Python) -> PyResult { + let schema_ptr = Box::new(ffi::Ffi_ArrowSchema::empty()); + let schema_ptr = Box::into_raw(schema_ptr); - // substring - let array = compute::substring::substring(array.as_ref(), start, &None) - .map_err(PyO3ArrowError::from)? - .into(); + unsafe { + ffi::export_field_to_c(field, schema_ptr); + }; - // export - to_py(array, py) -} + let pa = py.import("pyarrow")?; -/// Returns the concatenate -#[pyfunction] -fn concatenate(array: PyObject, py: Python) -> PyResult { - // import - let array = to_rust(array, py)?; + let array = pa + .getattr("Field")? + .call_method1("_import_from_c", (schema_ptr as Py_uintptr_t,))?; - // concat - let array = compute::concat::concatenate(&[array.as_ref(), array.as_ref()]) - .map_err(PyO3ArrowError::from)? - .into(); + unsafe { Box::from_raw(schema_ptr) }; - // export - to_py(array, py) + Ok(array.to_object(py)) } /// Converts to rust and back to python #[pyfunction] -fn round_trip(array: PyObject, py: Python) -> PyResult { +fn round_trip_array(array: PyObject, py: Python) -> PyResult { // import - let array = to_rust(array, py)?; + let array = to_rust_array(array, py)?; // export - to_py(array, py) + to_py_array(array, py) } /// Converts to rust and back to python #[pyfunction] -fn import_primitive(array: PyObject, py: Python) -> PyResult { - let array = to_rust(array, py)?; - let expected = Arc::new(PrimitiveArray::::from(vec![Some(2), None, Some(6)])) as ArrayRef; - - Ok(array == expected) -} - -/// Converts to rust and back to python -#[pyfunction] -fn export_primitive(py: Python) -> PyResult { - let array = Arc::new(PrimitiveArray::::from(vec![Some(2), None, Some(6)])) as ArrayRef; - - let array = to_py(array, py)?; +fn round_trip_field(array: PyObject, py: Python) -> PyResult { + // import + let field = to_rust_field(array, py)?; - Ok(array) + // export + to_py_field(&field, py) } #[pyfunction] @@ -224,13 +159,8 @@ fn total_allocated_bytes() -> PyResult { #[pymodule] fn arrow_pyarrow_integration_testing(_py: Python, m: &PyModule) -> PyResult<()> { - m.add_wrapped(wrap_pyfunction!(total_allocated_bytes))?; - m.add_wrapped(wrap_pyfunction!(export_primitive))?; - m.add_wrapped(wrap_pyfunction!(import_primitive))?; - m.add_wrapped(wrap_pyfunction!(double))?; - m.add_wrapped(wrap_pyfunction!(double_py))?; - m.add_wrapped(wrap_pyfunction!(substring))?; - m.add_wrapped(wrap_pyfunction!(concatenate))?; - m.add_wrapped(wrap_pyfunction!(round_trip))?; + m.add_function(wrap_pyfunction!(total_allocated_bytes, m)?)?; + m.add_function(wrap_pyfunction!(round_trip_array, m)?)?; + m.add_function(wrap_pyfunction!(round_trip_field, m)?)?; Ok(()) } diff --git a/arrow-pyarrow-integration-testing/tests/test_sql.py b/arrow-pyarrow-integration-testing/tests/test_sql.py index d891eb27b90..e28834b1da2 100644 --- a/arrow-pyarrow-integration-testing/tests/test_sql.py +++ b/arrow-pyarrow-integration-testing/tests/test_sql.py @@ -40,60 +40,12 @@ def tearDown(self): # No leak of C++ memory self.assertEqual(self.old_allocated_cpp, pyarrow.total_allocated_bytes()) - def test_primitive_python(self): - """ - Python -> Rust -> Python - """ - a = pyarrow.array([1, 2, 3]) - b = arrow_pyarrow_integration_testing.double(a) - self.assertEqual(b, pyarrow.array([2, 4, 6])) - - def test_primitive_rust(self): - """ - Rust -> Python -> Rust - """ - - def double(array): - array = array.to_pylist() - return pyarrow.array([x * 2 if x is not None else None for x in array]) - - is_correct = arrow_pyarrow_integration_testing.double_py(double) - self.assertTrue(is_correct) - - def test_import_primitive(self): - """ - Python -> Rust - """ - old_allocated = pyarrow.total_allocated_bytes() - - a = pyarrow.array([2, None, 6]) - - is_correct = arrow_pyarrow_integration_testing.import_primitive(a) - self.assertTrue(is_correct) - # No leak of C++ memory - del a - self.assertEqual(old_allocated, pyarrow.total_allocated_bytes()) - - def test_export_primitive(self): - """ - Python -> Rust - """ - old_allocated = pyarrow.total_allocated_bytes() - - expected = pyarrow.array([2, None, 6]) - - result = arrow_pyarrow_integration_testing.export_primitive() - self.assertEqual(expected, result) - # No leak of C++ memory - del expected - self.assertEqual(old_allocated, pyarrow.total_allocated_bytes()) - def test_string_roundtrip(self): """ Python -> Rust -> Python """ a = pyarrow.array(["a", None, "ccc"]) - b = arrow_pyarrow_integration_testing.round_trip(a) + b = arrow_pyarrow_integration_testing.round_trip_array(a) c = pyarrow.array(["a", None, "ccc"]) self.assertEqual(b, c) @@ -107,26 +59,9 @@ def test_decimal_roundtrip(self): None, ] a = pyarrow.array(data, pyarrow.decimal128(5, 2)) - b = arrow_pyarrow_integration_testing.round_trip(a) + b = arrow_pyarrow_integration_testing.round_trip_array(a) self.assertEqual(a, b) - def test_string_python(self): - """ - Python -> Rust -> Python - """ - a = pyarrow.array(["a", None, "ccc"]) - b = arrow_pyarrow_integration_testing.substring(a, 1) - self.assertEqual(b, pyarrow.array(["", None, "cc"])) - - def test_time32_python(self): - """ - Python -> Rust -> Python - """ - a = pyarrow.array([None, 1, 2], pyarrow.time32("s")) - b = arrow_pyarrow_integration_testing.concatenate(a) - expected = pyarrow.array([None, 1, 2] + [None, 1, 2], pyarrow.time32("s")) - self.assertEqual(b, expected) - def test_list_array(self): """ Python -> Rust -> Python @@ -135,7 +70,7 @@ def test_list_array(self): a = pyarrow.array( [[], None, [1, 2], [4, 5, 6]], pyarrow.list_(pyarrow.int64()) ) - b = arrow_pyarrow_integration_testing.round_trip(a) + b = arrow_pyarrow_integration_testing.round_trip_array(a) b.validate(full=True) assert a.to_pylist() == b.to_pylist() @@ -159,7 +94,7 @@ def test_struct_array(self): ], pyarrow.struct(fields), ) - b = arrow_pyarrow_integration_testing.round_trip(a) + b = arrow_pyarrow_integration_testing.round_trip_array(a) b.validate(full=True) assert a.to_pylist() == b.to_pylist() @@ -174,7 +109,7 @@ def test_list_list_array(self): [[None], None, [[1], [2]], [[4, 5], [6]]], pyarrow.list_(pyarrow.list_(pyarrow.int64())), ) - b = arrow_pyarrow_integration_testing.round_trip(a) + b = arrow_pyarrow_integration_testing.round_trip_array(a) b.validate(full=True) assert a.to_pylist() == b.to_pylist() @@ -188,7 +123,7 @@ def test_dict(self): ["a", "a", "b", None, "c"], pyarrow.dictionary(pyarrow.int64(), pyarrow.utf8()), ) - b = arrow_pyarrow_integration_testing.round_trip(a) + b = arrow_pyarrow_integration_testing.round_trip_array(a) b.validate(full=True) assert a.to_pylist() == b.to_pylist() @@ -205,7 +140,7 @@ def test_sparse_union(self): pyarrow.array([0, 1, 2, None, 0], pyarrow.int64()), ], ) - b = arrow_pyarrow_integration_testing.round_trip(a) + b = arrow_pyarrow_integration_testing.round_trip_array(a) b.validate(full=True) assert a.to_pylist() == b.to_pylist() @@ -223,8 +158,24 @@ def test_dense_union(self): pyarrow.array([0, 1, 2, None, 0], pyarrow.int64()), ], ) - b = arrow_pyarrow_integration_testing.round_trip(a) + b = arrow_pyarrow_integration_testing.round_trip_array(a) b.validate(full=True) assert a.to_pylist() == b.to_pylist() assert a.type == b.type + + def test_field(self): + field = pyarrow.field("aa", pyarrow.bool_()) + result = arrow_pyarrow_integration_testing.round_trip_field(field) + assert field == result + + def test_field_nested(self): + field = pyarrow.field("aa", pyarrow.list_(pyarrow.field("ab", pyarrow.bool_()))) + result = arrow_pyarrow_integration_testing.round_trip_field(field) + assert field == result + + def test_field_metadata(self): + field = pyarrow.field("aa", pyarrow.bool_(), {"a": "b"}) + result = arrow_pyarrow_integration_testing.round_trip_field(field) + assert field == result + assert field.metadata == result.metadata