From b5c9bb2ad9ab0a89168bcf0730d2307cd82feeea Mon Sep 17 00:00:00 2001 From: Jefffrey Date: Fri, 29 Mar 2024 11:56:31 +1100 Subject: [PATCH 1/9] Update py03 from 0.20 to 0.21 --- arrow/Cargo.toml | 2 +- arrow/src/pyarrow.rs | 38 +++++++++++++++++--------------------- arrow/tests/pyarrow.rs | 4 ++-- 3 files changed, 20 insertions(+), 24 deletions(-) diff --git a/arrow/Cargo.toml b/arrow/Cargo.toml index 4f7fda9b8075..82529fb4ac3d 100644 --- a/arrow/Cargo.toml +++ b/arrow/Cargo.toml @@ -54,7 +54,7 @@ arrow-select = { workspace = true } arrow-string = { workspace = true } rand = { version = "0.8", default-features = false, features = ["std", "std_rng"], optional = true } -pyo3 = { version = "0.20", default-features = false, optional = true } +pyo3 = { version = "0.21", default-features = false, optional = true } [package.metadata.docs.rs] features = ["prettyprint", "ipc_compression", "ffi", "pyarrow"] diff --git a/arrow/src/pyarrow.rs b/arrow/src/pyarrow.rs index 39702ce01aea..0efd85597c3c 100644 --- a/arrow/src/pyarrow.rs +++ b/arrow/src/pyarrow.rs @@ -102,8 +102,8 @@ impl IntoPyArrow for T { } fn validate_class(expected: &str, value: &PyAny) -> PyResult<()> { - let pyarrow = PyModule::import(value.py(), "pyarrow")?; - let class = pyarrow.getattr(expected)?; + let pyarrow = PyModule::import_bound(value.py(), "pyarrow")?; + let class = pyarrow.getattr(expected)?.into_gil_ref(); // TODO if !value.is_instance(class)? { let expected_module = class.getattr("__module__")?.extract::<&str>()?; let expected_name = class.getattr("__name__")?.extract::<&str>()?; @@ -143,8 +143,7 @@ impl FromPyArrow for DataType { // method, so prefer it over _export_to_c. // See https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html if value.hasattr("__arrow_c_schema__")? { - let capsule: &PyCapsule = - PyTryInto::try_into(value.getattr("__arrow_c_schema__")?.call0()?)?; + let capsule: &PyCapsule = value.getattr("__arrow_c_schema__")?.call0()?.downcast()?; validate_pycapsule(capsule, "arrow_schema")?; let schema_ptr = unsafe { capsule.reference::() }; @@ -166,7 +165,7 @@ impl ToPyArrow for DataType { fn to_pyarrow(&self, py: Python) -> PyResult { let c_schema = FFI_ArrowSchema::try_from(self).map_err(to_py_err)?; let c_schema_ptr = &c_schema as *const FFI_ArrowSchema; - let module = py.import("pyarrow")?; + let module = py.import_bound("pyarrow")?; let class = module.getattr("DataType")?; let dtype = class.call_method1("_import_from_c", (c_schema_ptr as Py_uintptr_t,))?; Ok(dtype.into()) @@ -179,8 +178,7 @@ impl FromPyArrow for Field { // method, so prefer it over _export_to_c. // See https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html if value.hasattr("__arrow_c_schema__")? { - let capsule: &PyCapsule = - PyTryInto::try_into(value.getattr("__arrow_c_schema__")?.call0()?)?; + let capsule: &PyCapsule = value.getattr("__arrow_c_schema__")?.call0()?.downcast()?; validate_pycapsule(capsule, "arrow_schema")?; let schema_ptr = unsafe { capsule.reference::() }; @@ -202,7 +200,7 @@ impl ToPyArrow for Field { fn to_pyarrow(&self, py: Python) -> PyResult { let c_schema = FFI_ArrowSchema::try_from(self).map_err(to_py_err)?; let c_schema_ptr = &c_schema as *const FFI_ArrowSchema; - let module = py.import("pyarrow")?; + let module = py.import_bound("pyarrow")?; let class = module.getattr("Field")?; let dtype = class.call_method1("_import_from_c", (c_schema_ptr as Py_uintptr_t,))?; Ok(dtype.into()) @@ -215,8 +213,7 @@ impl FromPyArrow for Schema { // method, so prefer it over _export_to_c. // See https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html if value.hasattr("__arrow_c_schema__")? { - let capsule: &PyCapsule = - PyTryInto::try_into(value.getattr("__arrow_c_schema__")?.call0()?)?; + let capsule: &PyCapsule = value.getattr("__arrow_c_schema__")?.call0()?.downcast()?; validate_pycapsule(capsule, "arrow_schema")?; let schema_ptr = unsafe { capsule.reference::() }; @@ -238,7 +235,7 @@ impl ToPyArrow for Schema { fn to_pyarrow(&self, py: Python) -> PyResult { let c_schema = FFI_ArrowSchema::try_from(self).map_err(to_py_err)?; let c_schema_ptr = &c_schema as *const FFI_ArrowSchema; - let module = py.import("pyarrow")?; + let module = py.import_bound("pyarrow")?; let class = module.getattr("Schema")?; let schema = class.call_method1("_import_from_c", (c_schema_ptr as Py_uintptr_t,))?; Ok(schema.into()) @@ -259,8 +256,8 @@ impl FromPyArrow for ArrayData { )); } - let schema_capsule: &PyCapsule = PyTryInto::try_into(tuple.get_item(0)?)?; - let array_capsule: &PyCapsule = PyTryInto::try_into(tuple.get_item(1)?)?; + let schema_capsule: &PyCapsule = tuple.get_item(0)?.downcast()?; + let array_capsule: &PyCapsule = tuple.get_item(1)?.downcast()?; validate_pycapsule(schema_capsule, "arrow_schema")?; validate_pycapsule(array_capsule, "arrow_array")?; @@ -296,7 +293,7 @@ impl ToPyArrow for ArrayData { let array = FFI_ArrowArray::new(self); let schema = FFI_ArrowSchema::try_from(self.data_type()).map_err(to_py_err)?; - let module = py.import("pyarrow")?; + let module = py.import_bound("pyarrow")?; let class = module.getattr("Array")?; let array = class.call_method1( "_import_from_c", @@ -340,8 +337,8 @@ impl FromPyArrow for RecordBatch { )); } - let schema_capsule: &PyCapsule = PyTryInto::try_into(tuple.get_item(0)?)?; - let array_capsule: &PyCapsule = PyTryInto::try_into(tuple.get_item(1)?)?; + let schema_capsule: &PyCapsule = tuple.get_item(0)?.downcast()?; + let array_capsule: &PyCapsule = tuple.get_item(1)?.downcast()?; validate_pycapsule(schema_capsule, "arrow_schema")?; validate_pycapsule(array_capsule, "arrow_array")?; @@ -400,8 +397,7 @@ impl FromPyArrow for ArrowArrayStreamReader { // method, so prefer it over _export_to_c. // See https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html if value.hasattr("__arrow_c_stream__")? { - let capsule: &PyCapsule = - PyTryInto::try_into(value.getattr("__arrow_c_stream__")?.call0()?)?; + let capsule: &PyCapsule = value.getattr("__arrow_c_stream__")?.call0()?.downcast()?; validate_pycapsule(capsule, "arrow_array_stream")?; let stream = unsafe { FFI_ArrowArrayStream::from_raw(capsule.pointer() as _) }; @@ -421,7 +417,7 @@ impl FromPyArrow for ArrowArrayStreamReader { // make the conversion through PyArrow's private API // this changes the pointer's memory and is thus unsafe. // In particular, `_export_to_c` can go out of bounds - let args = PyTuple::new(value.py(), [stream_ptr as Py_uintptr_t]); + let args = PyTuple::new_bound(value.py(), [stream_ptr as Py_uintptr_t]); value.call_method1("_export_to_c", args)?; let stream_reader = ArrowArrayStreamReader::try_new(stream) @@ -439,9 +435,9 @@ impl IntoPyArrow for Box { let mut stream = FFI_ArrowArrayStream::new(self); let stream_ptr = (&mut stream) as *mut FFI_ArrowArrayStream; - let module = py.import("pyarrow")?; + let module = py.import_bound("pyarrow")?; let class = module.getattr("RecordBatchReader")?; - let args = PyTuple::new(py, [stream_ptr as Py_uintptr_t]); + let args = PyTuple::new_bound(py, [stream_ptr as Py_uintptr_t]); let reader = class.call_method1("_import_from_c", args)?; Ok(PyObject::from(reader)) diff --git a/arrow/tests/pyarrow.rs b/arrow/tests/pyarrow.rs index 4b6991da0063..a9cefc2b72f7 100644 --- a/arrow/tests/pyarrow.rs +++ b/arrow/tests/pyarrow.rs @@ -32,9 +32,9 @@ fn test_to_pyarrow() { let res = Python::with_gil(|py| { let py_input = input.to_pyarrow(py)?; - let records = RecordBatch::from_pyarrow(py_input.as_ref(py))?; + let records = RecordBatch::from_pyarrow(py_input.bind(py).as_gil_ref())?; // TODO let py_records = records.to_pyarrow(py)?; - RecordBatch::from_pyarrow(py_records.as_ref(py)) + RecordBatch::from_pyarrow(py_records.bind(py).as_gil_ref()) // TODO }) .unwrap(); From 5f639061d4880716e458fe1bcce70495054a40f3 Mon Sep 17 00:00:00 2001 From: Jefffrey Date: Fri, 29 Mar 2024 12:13:04 +1100 Subject: [PATCH 2/9] Bump pyo3 in arrow-pyarrow-integration-testing --- arrow-pyarrow-integration-testing/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arrow-pyarrow-integration-testing/Cargo.toml b/arrow-pyarrow-integration-testing/Cargo.toml index 8c60c086c29a..129d0eca3907 100644 --- a/arrow-pyarrow-integration-testing/Cargo.toml +++ b/arrow-pyarrow-integration-testing/Cargo.toml @@ -34,4 +34,4 @@ crate-type = ["cdylib"] [dependencies] arrow = { path = "../arrow", features = ["pyarrow"] } -pyo3 = { version = "0.20", features = ["extension-module"] } +pyo3 = { version = "0.21", features = ["extension-module"] } From cecc24a67d05690323e9ef1c85f6060afcbdabb9 Mon Sep 17 00:00:00 2001 From: Jefffrey Date: Sun, 31 Mar 2024 14:39:37 +1100 Subject: [PATCH 3/9] Update pyarrow API to align with pyo3 0.21 changes --- arrow/src/pyarrow.rs | 78 ++++++++++++++++++++++++++---------------- arrow/tests/pyarrow.rs | 4 +-- 2 files changed, 50 insertions(+), 32 deletions(-) diff --git a/arrow/src/pyarrow.rs b/arrow/src/pyarrow.rs index 0efd85597c3c..8d768c83015b 100644 --- a/arrow/src/pyarrow.rs +++ b/arrow/src/pyarrow.rs @@ -82,7 +82,12 @@ fn to_py_err(err: ArrowError) -> PyErr { } pub trait FromPyArrow: Sized { - fn from_pyarrow(value: &PyAny) -> PyResult; + #[deprecated(since = "52.0.0", note = "Use from_pyarrow_bound")] + fn from_pyarrow(value: &PyAny) -> PyResult { + Self::from_pyarrow_bound(&value.as_borrowed()) + } + + fn from_pyarrow_bound(value: &Bound) -> PyResult; } /// Create a new PyArrow object from a arrow-rs type. @@ -101,15 +106,19 @@ impl IntoPyArrow for T { } } -fn validate_class(expected: &str, value: &PyAny) -> PyResult<()> { +fn validate_class(expected: &str, value: &Bound) -> PyResult<()> { let pyarrow = PyModule::import_bound(value.py(), "pyarrow")?; - let class = pyarrow.getattr(expected)?.into_gil_ref(); // TODO - if !value.is_instance(class)? { - let expected_module = class.getattr("__module__")?.extract::<&str>()?; - let expected_name = class.getattr("__name__")?.extract::<&str>()?; + let class = pyarrow.getattr(expected)?; + if !value.as_ref().is_instance(class.as_ref())? { + let expected_module = class.getattr("__module__")?; + let expected_module = expected_module.extract::<&str>()?; + let expected_name = class.getattr("__name__")?; + let expected_name = expected_name.extract::<&str>()?; let found_class = value.get_type(); - let found_module = found_class.getattr("__module__")?.extract::<&str>()?; - let found_name = found_class.getattr("__name__")?.extract::<&str>()?; + let found_module = found_class.getattr("__module__")?; + let found_module = found_module.extract::<&str>()?; + let found_name = found_class.getattr("__name__")?; + let found_name = found_name.extract::<&str>()?; return Err(PyTypeError::new_err(format!( "Expected instance of {}.{}, got {}.{}", expected_module, expected_name, found_module, found_name @@ -118,7 +127,7 @@ fn validate_class(expected: &str, value: &PyAny) -> PyResult<()> { Ok(()) } -fn validate_pycapsule(capsule: &PyCapsule, name: &str) -> PyResult<()> { +fn validate_pycapsule(capsule: &Bound, name: &str) -> PyResult<()> { let capsule_name = capsule.name()?; if capsule_name.is_none() { return Err(PyValueError::new_err( @@ -138,12 +147,13 @@ fn validate_pycapsule(capsule: &PyCapsule, name: &str) -> PyResult<()> { } impl FromPyArrow for DataType { - fn from_pyarrow(value: &PyAny) -> PyResult { + fn from_pyarrow_bound(value: &Bound) -> PyResult { // Newer versions of PyArrow as well as other libraries with Arrow data implement this // method, so prefer it over _export_to_c. // See https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html if value.hasattr("__arrow_c_schema__")? { - let capsule: &PyCapsule = value.getattr("__arrow_c_schema__")?.call0()?.downcast()?; + let capsule = value.getattr("__arrow_c_schema__")?.call0()?; + let capsule = capsule.downcast::()?; validate_pycapsule(capsule, "arrow_schema")?; let schema_ptr = unsafe { capsule.reference::() }; @@ -173,12 +183,13 @@ impl ToPyArrow for DataType { } impl FromPyArrow for Field { - fn from_pyarrow(value: &PyAny) -> PyResult { + fn from_pyarrow_bound(value: &Bound) -> PyResult { // Newer versions of PyArrow as well as other libraries with Arrow data implement this // method, so prefer it over _export_to_c. // See https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html if value.hasattr("__arrow_c_schema__")? { - let capsule: &PyCapsule = value.getattr("__arrow_c_schema__")?.call0()?.downcast()?; + let capsule = value.getattr("__arrow_c_schema__")?.call0()?; + let capsule = capsule.downcast::()?; validate_pycapsule(capsule, "arrow_schema")?; let schema_ptr = unsafe { capsule.reference::() }; @@ -208,12 +219,13 @@ impl ToPyArrow for Field { } impl FromPyArrow for Schema { - fn from_pyarrow(value: &PyAny) -> PyResult { + fn from_pyarrow_bound(value: &Bound) -> PyResult { // Newer versions of PyArrow as well as other libraries with Arrow data implement this // method, so prefer it over _export_to_c. // See https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html if value.hasattr("__arrow_c_schema__")? { - let capsule: &PyCapsule = value.getattr("__arrow_c_schema__")?.call0()?.downcast()?; + let capsule = value.getattr("__arrow_c_schema__")?.call0()?; + let capsule = capsule.downcast::()?; validate_pycapsule(capsule, "arrow_schema")?; let schema_ptr = unsafe { capsule.reference::() }; @@ -243,7 +255,7 @@ impl ToPyArrow for Schema { } impl FromPyArrow for ArrayData { - fn from_pyarrow(value: &PyAny) -> PyResult { + fn from_pyarrow_bound(value: &Bound) -> PyResult { // Newer versions of PyArrow as well as other libraries with Arrow data implement this // method, so prefer it over _export_to_c. // See https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html @@ -256,8 +268,10 @@ impl FromPyArrow for ArrayData { )); } - let schema_capsule: &PyCapsule = tuple.get_item(0)?.downcast()?; - let array_capsule: &PyCapsule = tuple.get_item(1)?.downcast()?; + let schema_capsule = tuple.get_item(0)?; + let schema_capsule = schema_capsule.downcast::()?; + let array_capsule = tuple.get_item(1)?; + let array_capsule = array_capsule.downcast::()?; validate_pycapsule(schema_capsule, "arrow_schema")?; validate_pycapsule(array_capsule, "arrow_array")?; @@ -307,9 +321,9 @@ impl ToPyArrow for ArrayData { } impl FromPyArrow for Vec { - fn from_pyarrow(value: &PyAny) -> PyResult { + fn from_pyarrow_bound(value: &Bound) -> PyResult { let list = value.downcast::()?; - list.iter().map(|x| T::from_pyarrow(x)).collect() + list.iter().map(|x| T::from_pyarrow_bound(&x)).collect() } } @@ -324,7 +338,7 @@ impl ToPyArrow for Vec { } impl FromPyArrow for RecordBatch { - fn from_pyarrow(value: &PyAny) -> PyResult { + fn from_pyarrow_bound(value: &Bound) -> PyResult { // Newer versions of PyArrow as well as other libraries with Arrow data implement this // method, so prefer it over _export_to_c. // See https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html @@ -337,8 +351,10 @@ impl FromPyArrow for RecordBatch { )); } - let schema_capsule: &PyCapsule = tuple.get_item(0)?.downcast()?; - let array_capsule: &PyCapsule = tuple.get_item(1)?.downcast()?; + let schema_capsule = tuple.get_item(0)?; + let schema_capsule = schema_capsule.downcast::()?; + let array_capsule = tuple.get_item(1)?; + let array_capsule = array_capsule.downcast::()?; validate_pycapsule(schema_capsule, "arrow_schema")?; validate_pycapsule(array_capsule, "arrow_array")?; @@ -367,12 +383,13 @@ impl FromPyArrow for RecordBatch { validate_class("RecordBatch", value)?; // TODO(kszucs): implement the FFI conversions in arrow-rs for RecordBatches let schema = value.getattr("schema")?; - let schema = Arc::new(Schema::from_pyarrow(schema)?); + let schema = Arc::new(Schema::from_pyarrow_bound(&schema)?); - let arrays = value.getattr("columns")?.downcast::()?; + let arrays = value.getattr("columns")?; let arrays = arrays + .downcast::()? .iter() - .map(|a| Ok(make_array(ArrayData::from_pyarrow(a)?))) + .map(|a| Ok(make_array(ArrayData::from_pyarrow_bound(&a)?))) .collect::>()?; let batch = RecordBatch::try_new(schema, arrays).map_err(to_py_err)?; @@ -392,12 +409,13 @@ impl ToPyArrow for RecordBatch { /// Supports conversion from `pyarrow.RecordBatchReader` to [ArrowArrayStreamReader]. impl FromPyArrow for ArrowArrayStreamReader { - fn from_pyarrow(value: &PyAny) -> PyResult { + fn from_pyarrow_bound(value: &Bound) -> PyResult { // Newer versions of PyArrow as well as other libraries with Arrow data implement this // method, so prefer it over _export_to_c. // See https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html if value.hasattr("__arrow_c_stream__")? { - let capsule: &PyCapsule = value.getattr("__arrow_c_stream__")?.call0()?.downcast()?; + let capsule = value.getattr("__arrow_c_schema__")?.call0()?; + let capsule = capsule.downcast::()?; validate_pycapsule(capsule, "arrow_array_stream")?; let stream = unsafe { FFI_ArrowArrayStream::from_raw(capsule.pointer() as _) }; @@ -459,8 +477,8 @@ impl IntoPyArrow for ArrowArrayStreamReader { pub struct PyArrowType(pub T); impl<'source, T: FromPyArrow> FromPyObject<'source> for PyArrowType { - fn extract(value: &'source PyAny) -> PyResult { - Ok(Self(T::from_pyarrow(value)?)) + fn extract_bound(value: &Bound<'source, PyAny>) -> PyResult { + Ok(Self(T::from_pyarrow_bound(value)?)) } } diff --git a/arrow/tests/pyarrow.rs b/arrow/tests/pyarrow.rs index a9cefc2b72f7..a1c365c31798 100644 --- a/arrow/tests/pyarrow.rs +++ b/arrow/tests/pyarrow.rs @@ -32,9 +32,9 @@ fn test_to_pyarrow() { let res = Python::with_gil(|py| { let py_input = input.to_pyarrow(py)?; - let records = RecordBatch::from_pyarrow(py_input.bind(py).as_gil_ref())?; // TODO + let records = RecordBatch::from_pyarrow_bound(py_input.bind(py))?; let py_records = records.to_pyarrow(py)?; - RecordBatch::from_pyarrow(py_records.bind(py).as_gil_ref()) // TODO + RecordBatch::from_pyarrow_bound(py_records.bind(py)) }) .unwrap(); From 646052b5002e28109f6897e4f9896372f4cfa497 Mon Sep 17 00:00:00 2001 From: Jefffrey Date: Sun, 31 Mar 2024 16:37:18 +1100 Subject: [PATCH 4/9] Fix arrow-pyarrow-integration-testing clippy --- arrow-pyarrow-integration-testing/src/lib.rs | 25 +++++++------------- 1 file changed, 9 insertions(+), 16 deletions(-) diff --git a/arrow-pyarrow-integration-testing/src/lib.rs b/arrow-pyarrow-integration-testing/src/lib.rs index a53447b53c31..918fa74e3083 100644 --- a/arrow-pyarrow-integration-testing/src/lib.rs +++ b/arrow-pyarrow-integration-testing/src/lib.rs @@ -40,9 +40,9 @@ fn to_py_err(err: ArrowError) -> PyErr { /// Returns `array + array` of an int64 array. #[pyfunction] -fn double(array: &PyAny, py: Python) -> PyResult { +fn double(array: &Bound, py: Python) -> PyResult { // import - let array = make_array(ArrayData::from_pyarrow(array)?); + let array = make_array(ArrayData::from_pyarrow_bound(&array)?); // perform some operation let array = array @@ -60,7 +60,7 @@ fn double(array: &PyAny, py: Python) -> PyResult { /// calls a lambda function that receives and returns an array /// whose result must be the array multiplied by two #[pyfunction] -fn double_py(lambda: &PyAny, py: Python) -> PyResult { +fn double_py(lambda: &Bound, py: Python) -> PyResult { // create let array = Arc::new(Int64Array::from(vec![Some(1), None, Some(3)])); let expected = Arc::new(Int64Array::from(vec![Some(2), None, Some(6)])) as ArrayRef; @@ -68,7 +68,7 @@ fn double_py(lambda: &PyAny, py: Python) -> PyResult { // to py let pyarray = array.to_data().to_pyarrow(py)?; let pyarray = lambda.call1((pyarray,))?; - let array = make_array(ArrayData::from_pyarrow(pyarray)?); + let array = make_array(ArrayData::from_pyarrow_bound(&pyarray)?); Ok(array == expected) } @@ -82,16 +82,12 @@ fn make_empty_array(datatype: PyArrowType, py: Python) -> PyResult, - start: i64, -) -> PyResult> { +fn substring(array: PyArrowType, start: i64) -> PyResult> { // import let array = make_array(array.0); // substring - let array = - kernels::substring::substring(array.as_ref(), start, None).map_err(to_py_err)?; + let array = kernels::substring::substring(array.as_ref(), start, None).map_err(to_py_err)?; Ok(array.to_data().into()) } @@ -102,8 +98,7 @@ fn concatenate(array: PyArrowType, py: Python) -> PyResult let array = make_array(array.0); // concat - let array = - kernels::concat::concat(&[array.as_ref(), array.as_ref()]).map_err(to_py_err)?; + let array = kernels::concat::concat(&[array.as_ref(), array.as_ref()]).map_err(to_py_err)?; array.to_data().to_pyarrow(py) } @@ -129,9 +124,7 @@ fn round_trip_array(obj: PyArrowType) -> PyResult, -) -> PyResult> { +fn round_trip_record_batch(obj: PyArrowType) -> PyResult> { Ok(obj) } @@ -168,7 +161,7 @@ fn boxed_reader_roundtrip( } #[pymodule] -fn arrow_pyarrow_integration_testing(_py: Python, m: &PyModule) -> PyResult<()> { +fn arrow_pyarrow_integration_testing(_py: Python, m: &Bound) -> PyResult<()> { m.add_wrapped(wrap_pyfunction!(double))?; m.add_wrapped(wrap_pyfunction!(double_py))?; m.add_wrapped(wrap_pyfunction!(make_empty_array))?; From 3a1956f550998ae914385fe0cd79a31386ef0659 Mon Sep 17 00:00:00 2001 From: Jefffrey Date: Sun, 31 Mar 2024 17:05:36 +1100 Subject: [PATCH 5/9] Minor --- arrow/src/pyarrow.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arrow/src/pyarrow.rs b/arrow/src/pyarrow.rs index 8d768c83015b..da574190d147 100644 --- a/arrow/src/pyarrow.rs +++ b/arrow/src/pyarrow.rs @@ -109,7 +109,7 @@ impl IntoPyArrow for T { fn validate_class(expected: &str, value: &Bound) -> PyResult<()> { let pyarrow = PyModule::import_bound(value.py(), "pyarrow")?; let class = pyarrow.getattr(expected)?; - if !value.as_ref().is_instance(class.as_ref())? { + if !value.is_instance(&class)? { let expected_module = class.getattr("__module__")?; let expected_module = expected_module.extract::<&str>()?; let expected_name = class.getattr("__name__")?; From b1f5c495f44e116533f9f1ebe4883d46e7a5dc41 Mon Sep 17 00:00:00 2001 From: Jefffrey Date: Sun, 31 Mar 2024 17:46:21 +1100 Subject: [PATCH 6/9] Fix typo --- arrow/src/pyarrow.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arrow/src/pyarrow.rs b/arrow/src/pyarrow.rs index da574190d147..c3180e8794b9 100644 --- a/arrow/src/pyarrow.rs +++ b/arrow/src/pyarrow.rs @@ -414,7 +414,7 @@ impl FromPyArrow for ArrowArrayStreamReader { // method, so prefer it over _export_to_c. // See https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html if value.hasattr("__arrow_c_stream__")? { - let capsule = value.getattr("__arrow_c_schema__")?.call0()?; + let capsule = value.getattr("__arrow_c_stream__")?.call0()?; let capsule = capsule.downcast::()?; validate_pycapsule(capsule, "arrow_array_stream")?; From 53b3b6b13ffdad2c8e19e2c5de4ffc05ffcf2fc9 Mon Sep 17 00:00:00 2001 From: Jefffrey Date: Wed, 3 Apr 2024 07:45:49 +1100 Subject: [PATCH 7/9] Use PyBackedStr when extracting --- arrow/src/pyarrow.rs | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/arrow/src/pyarrow.rs b/arrow/src/pyarrow.rs index c3180e8794b9..1733067c738a 100644 --- a/arrow/src/pyarrow.rs +++ b/arrow/src/pyarrow.rs @@ -64,6 +64,7 @@ use pyo3::exceptions::{PyTypeError, PyValueError}; use pyo3::ffi::Py_uintptr_t; use pyo3::import_exception; use pyo3::prelude::*; +use pyo3::pybacked::PyBackedStr; use pyo3::types::{PyCapsule, PyList, PyTuple}; use crate::array::{make_array, ArrayData}; @@ -110,15 +111,13 @@ fn validate_class(expected: &str, value: &Bound) -> PyResult<()> { let pyarrow = PyModule::import_bound(value.py(), "pyarrow")?; let class = pyarrow.getattr(expected)?; if !value.is_instance(&class)? { - let expected_module = class.getattr("__module__")?; - let expected_module = expected_module.extract::<&str>()?; - let expected_name = class.getattr("__name__")?; - let expected_name = expected_name.extract::<&str>()?; + let expected_module = class.getattr("__module__")?.extract::()?; + let expected_name = class.getattr("__name__")?.extract::()?; let found_class = value.get_type(); - let found_module = found_class.getattr("__module__")?; - let found_module = found_module.extract::<&str>()?; - let found_name = found_class.getattr("__name__")?; - let found_name = found_name.extract::<&str>()?; + let found_module = found_class + .getattr("__module__")? + .extract::()?; + let found_name = found_class.getattr("__name__")?.extract::()?; return Err(PyTypeError::new_err(format!( "Expected instance of {}.{}, got {}.{}", expected_module, expected_name, found_module, found_name From 8a7c9bee14773d8030703c070d668a04462b2f32 Mon Sep 17 00:00:00 2001 From: Jefffrey Date: Wed, 3 Apr 2024 07:47:12 +1100 Subject: [PATCH 8/9] Bump to pyo3 0.21.1 --- arrow-pyarrow-integration-testing/Cargo.toml | 2 +- arrow/Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arrow-pyarrow-integration-testing/Cargo.toml b/arrow-pyarrow-integration-testing/Cargo.toml index 129d0eca3907..6f07d42d88c1 100644 --- a/arrow-pyarrow-integration-testing/Cargo.toml +++ b/arrow-pyarrow-integration-testing/Cargo.toml @@ -34,4 +34,4 @@ crate-type = ["cdylib"] [dependencies] arrow = { path = "../arrow", features = ["pyarrow"] } -pyo3 = { version = "0.21", features = ["extension-module"] } +pyo3 = { version = "0.21.1", features = ["extension-module"] } diff --git a/arrow/Cargo.toml b/arrow/Cargo.toml index 82529fb4ac3d..a938d75b1a6f 100644 --- a/arrow/Cargo.toml +++ b/arrow/Cargo.toml @@ -54,7 +54,7 @@ arrow-select = { workspace = true } arrow-string = { workspace = true } rand = { version = "0.8", default-features = false, features = ["std", "std_rng"], optional = true } -pyo3 = { version = "0.21", default-features = false, optional = true } +pyo3 = { version = "0.21.1", default-features = false, optional = true } [package.metadata.docs.rs] features = ["prettyprint", "ipc_compression", "ffi", "pyarrow"] From 395875919107e30c9a7d82932dcfb3da3bd2aa61 Mon Sep 17 00:00:00 2001 From: Jefffrey Date: Fri, 5 Apr 2024 07:58:54 +1100 Subject: [PATCH 9/9] Trigger