Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update pyo3 to 0.21 #5579

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion arrow-pyarrow-integration-testing/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,4 +34,4 @@ crate-type = ["cdylib"]

[dependencies]
arrow = { path = "../arrow", features = ["pyarrow"] }
pyo3 = { version = "0.20", features = ["extension-module"] }
pyo3 = { version = "0.21", features = ["extension-module"] }
23 changes: 8 additions & 15 deletions arrow-pyarrow-integration-testing/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ fn to_py_err(err: ArrowError) -> PyErr {

/// Returns `array + array` of an int64 array.
#[pyfunction]
fn double(array: &PyAny, py: Python) -> PyResult<PyObject> {
fn double(array: &Bound<PyAny>, py: Python) -> PyResult<PyObject> {
// import
let array = make_array(ArrayData::from_pyarrow(array)?);

Expand All @@ -60,15 +60,15 @@ fn double(array: &PyAny, py: Python) -> PyResult<PyObject> {
/// calls a lambda function that receives and returns an array
/// whose result must be the array multiplied by two
#[pyfunction]
fn double_py(lambda: &PyAny, py: Python) -> PyResult<bool> {
fn double_py(lambda: &Bound<PyAny>, py: Python) -> PyResult<bool> {
// create
let array = Arc::new(Int64Array::from(vec![Some(1), None, Some(3)]));
let expected = Arc::new(Int64Array::from(vec![Some(2), None, Some(6)])) as ArrayRef;

// to py
let pyarray = array.to_data().to_pyarrow(py)?;
let pyarray = lambda.call1((pyarray,))?;
let array = make_array(ArrayData::from_pyarrow(pyarray)?);
let array = make_array(ArrayData::from_pyarrow(&pyarray)?);

Ok(array == expected)
}
Expand All @@ -82,16 +82,12 @@ fn make_empty_array(datatype: PyArrowType<DataType>, py: Python) -> PyResult<PyO

/// Returns the substring
#[pyfunction]
fn substring(
array: PyArrowType<ArrayData>,
start: i64,
) -> PyResult<PyArrowType<ArrayData>> {
fn substring(array: PyArrowType<ArrayData>, start: i64) -> PyResult<PyArrowType<ArrayData>> {
// import
let array = make_array(array.0);

// substring
let array =
kernels::substring::substring(array.as_ref(), start, None).map_err(to_py_err)?;
let array = kernels::substring::substring(array.as_ref(), start, None).map_err(to_py_err)?;

Ok(array.to_data().into())
}
Expand All @@ -102,8 +98,7 @@ fn concatenate(array: PyArrowType<ArrayData>, py: Python) -> PyResult<PyObject>
let array = make_array(array.0);

// concat
let array =
kernels::concat::concat(&[array.as_ref(), array.as_ref()]).map_err(to_py_err)?;
let array = kernels::concat::concat(&[array.as_ref(), array.as_ref()]).map_err(to_py_err)?;

array.to_data().to_pyarrow(py)
}
Expand All @@ -129,9 +124,7 @@ fn round_trip_array(obj: PyArrowType<ArrayData>) -> PyResult<PyArrowType<ArrayDa
}

#[pyfunction]
fn round_trip_record_batch(
obj: PyArrowType<RecordBatch>,
) -> PyResult<PyArrowType<RecordBatch>> {
fn round_trip_record_batch(obj: PyArrowType<RecordBatch>) -> PyResult<PyArrowType<RecordBatch>> {
Ok(obj)
}

Expand Down Expand Up @@ -168,7 +161,7 @@ fn boxed_reader_roundtrip(
}

#[pymodule]
fn arrow_pyarrow_integration_testing(_py: Python, m: &PyModule) -> PyResult<()> {
fn arrow_pyarrow_integration_testing(_py: Python, m: &Bound<PyModule>) -> PyResult<()> {
m.add_wrapped(wrap_pyfunction!(double))?;
m.add_wrapped(wrap_pyfunction!(double_py))?;
m.add_wrapped(wrap_pyfunction!(make_empty_array))?;
Expand Down
2 changes: 1 addition & 1 deletion arrow/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ arrow-select = { workspace = true }
arrow-string = { workspace = true }

rand = { version = "0.8", default-features = false, features = ["std", "std_rng"], optional = true }
pyo3 = { version = "0.20", default-features = false, optional = true }
pyo3 = { version = "0.21", default-features = false, optional = true }

[package.metadata.docs.rs]
features = ["prettyprint", "ipc_compression", "ffi", "pyarrow"]
Expand Down
95 changes: 51 additions & 44 deletions arrow/src/pyarrow.rs
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ fn to_py_err(err: ArrowError) -> PyErr {
}

pub trait FromPyArrow: Sized {
fn from_pyarrow(value: &PyAny) -> PyResult<Self>;
fn from_pyarrow(value: &Bound<PyAny>) -> PyResult<Self>;
}

/// Create a new PyArrow object from a arrow-rs type.
Expand All @@ -101,24 +101,27 @@ impl<T: ToPyArrow> IntoPyArrow for T {
}
}

fn validate_class(expected: &str, value: &PyAny) -> PyResult<()> {
let pyarrow = PyModule::import(value.py(), "pyarrow")?;
fn validate_class(expected: &str, value: &Bound<PyAny>) -> PyResult<()> {
let pyarrow = PyModule::import_bound(value.py(), "pyarrow")?;
let class = pyarrow.getattr(expected)?;
if !value.is_instance(class)? {
let expected_module = class.getattr("__module__")?.extract::<&str>()?;
let expected_name = class.getattr("__name__")?.extract::<&str>()?;
if !value.is_instance(&class)? {
let expected_module_owner = class.getattr("__module__")?;
let expected_module: &str = expected_module_owner.extract()?;
let expected_name_owner = class.getattr("__name__")?;
let expected_name: &str = expected_name_owner.extract()?;
let found_class = value.get_type();
let found_module = found_class.getattr("__module__")?.extract::<&str>()?;
let found_name = found_class.getattr("__name__")?.extract::<&str>()?;
let found_module_owner = found_class.getattr("__module__")?;
let found_module: &str = found_module_owner.extract()?;
let found_name_owner = found_class.getattr("__name__")?;
let found_name: &str = found_name_owner.extract()?;
return Err(PyTypeError::new_err(format!(
"Expected instance of {}.{}, got {}.{}",
expected_module, expected_name, found_module, found_name
"Expected instance of {expected_module}.{expected_name}, got {found_module}.{found_name}"
)));
}
Ok(())
}

fn validate_pycapsule(capsule: &PyCapsule, name: &str) -> PyResult<()> {
fn validate_pycapsule(capsule: &Bound<PyCapsule>, name: &str) -> PyResult<()> {
let capsule_name = capsule.name()?;
if capsule_name.is_none() {
return Err(PyValueError::new_err(
Expand All @@ -129,22 +132,21 @@ fn validate_pycapsule(capsule: &PyCapsule, name: &str) -> PyResult<()> {
let capsule_name = capsule_name.unwrap().to_str()?;
if capsule_name != name {
return Err(PyValueError::new_err(format!(
"Expected name '{}' in PyCapsule, instead got '{}'",
name, capsule_name
"Expected name '{name}' in PyCapsule, instead got '{capsule_name}'",
)));
}

Ok(())
}

impl FromPyArrow for DataType {
fn from_pyarrow(value: &PyAny) -> PyResult<Self> {
fn from_pyarrow(value: &Bound<PyAny>) -> PyResult<Self> {
// Newer versions of PyArrow as well as other libraries with Arrow data implement this
// method, so prefer it over _export_to_c.
// See https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
if value.hasattr("__arrow_c_schema__")? {
let capsule: &PyCapsule =
PyTryInto::try_into(value.getattr("__arrow_c_schema__")?.call0()?)?;
let capsule_owner = value.getattr("__arrow_c_schema__")?.call0()?;
let capsule: &Bound<PyCapsule> = capsule_owner.downcast()?;
validate_pycapsule(capsule, "arrow_schema")?;

let schema_ptr = unsafe { capsule.reference::<FFI_ArrowSchema>() };
Expand All @@ -166,21 +168,21 @@ impl ToPyArrow for DataType {
fn to_pyarrow(&self, py: Python) -> PyResult<PyObject> {
let c_schema = FFI_ArrowSchema::try_from(self).map_err(to_py_err)?;
let c_schema_ptr = &c_schema as *const FFI_ArrowSchema;
let module = py.import("pyarrow")?;
let module = py.import_bound("pyarrow")?;
let class = module.getattr("DataType")?;
let dtype = class.call_method1("_import_from_c", (c_schema_ptr as Py_uintptr_t,))?;
Ok(dtype.into())
}
}

impl FromPyArrow for Field {
fn from_pyarrow(value: &PyAny) -> PyResult<Self> {
fn from_pyarrow(value: &Bound<PyAny>) -> PyResult<Self> {
// Newer versions of PyArrow as well as other libraries with Arrow data implement this
// method, so prefer it over _export_to_c.
// See https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
if value.hasattr("__arrow_c_schema__")? {
let capsule: &PyCapsule =
PyTryInto::try_into(value.getattr("__arrow_c_schema__")?.call0()?)?;
let capsule_owner = value.getattr("__arrow_c_schema__")?.call0()?;
let capsule: &Bound<PyCapsule> = capsule_owner.downcast()?;
validate_pycapsule(capsule, "arrow_schema")?;

let schema_ptr = unsafe { capsule.reference::<FFI_ArrowSchema>() };
Expand All @@ -202,21 +204,21 @@ impl ToPyArrow for Field {
fn to_pyarrow(&self, py: Python) -> PyResult<PyObject> {
let c_schema = FFI_ArrowSchema::try_from(self).map_err(to_py_err)?;
let c_schema_ptr = &c_schema as *const FFI_ArrowSchema;
let module = py.import("pyarrow")?;
let module = py.import_bound("pyarrow")?;
let class = module.getattr("Field")?;
let dtype = class.call_method1("_import_from_c", (c_schema_ptr as Py_uintptr_t,))?;
Ok(dtype.into())
}
}

impl FromPyArrow for Schema {
fn from_pyarrow(value: &PyAny) -> PyResult<Self> {
fn from_pyarrow(value: &Bound<PyAny>) -> PyResult<Self> {
// Newer versions of PyArrow as well as other libraries with Arrow data implement this
// method, so prefer it over _export_to_c.
// See https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
if value.hasattr("__arrow_c_schema__")? {
let capsule: &PyCapsule =
PyTryInto::try_into(value.getattr("__arrow_c_schema__")?.call0()?)?;
let capsule_owner = value.getattr("__arrow_c_schema__")?.call0()?;
let capsule: &Bound<PyCapsule> = capsule_owner.downcast()?;
validate_pycapsule(capsule, "arrow_schema")?;

let schema_ptr = unsafe { capsule.reference::<FFI_ArrowSchema>() };
Expand All @@ -238,15 +240,15 @@ impl ToPyArrow for Schema {
fn to_pyarrow(&self, py: Python) -> PyResult<PyObject> {
let c_schema = FFI_ArrowSchema::try_from(self).map_err(to_py_err)?;
let c_schema_ptr = &c_schema as *const FFI_ArrowSchema;
let module = py.import("pyarrow")?;
let module = py.import_bound("pyarrow")?;
let class = module.getattr("Schema")?;
let schema = class.call_method1("_import_from_c", (c_schema_ptr as Py_uintptr_t,))?;
Ok(schema.into())
}
}

impl FromPyArrow for ArrayData {
fn from_pyarrow(value: &PyAny) -> PyResult<Self> {
fn from_pyarrow(value: &Bound<PyAny>) -> PyResult<Self> {
// Newer versions of PyArrow as well as other libraries with Arrow data implement this
// method, so prefer it over _export_to_c.
// See https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
Expand All @@ -259,8 +261,10 @@ impl FromPyArrow for ArrayData {
));
}

let schema_capsule: &PyCapsule = PyTryInto::try_into(tuple.get_item(0)?)?;
let array_capsule: &PyCapsule = PyTryInto::try_into(tuple.get_item(1)?)?;
let schema_capsule_owner = tuple.get_item(0)?;
let schema_capsule: &Bound<PyCapsule> = schema_capsule_owner.downcast()?;
let array_capsule_owner = tuple.get_item(1)?;
let array_capsule: &Bound<PyCapsule> = array_capsule_owner.downcast()?;

validate_pycapsule(schema_capsule, "arrow_schema")?;
validate_pycapsule(array_capsule, "arrow_array")?;
Expand Down Expand Up @@ -296,7 +300,7 @@ impl ToPyArrow for ArrayData {
let array = FFI_ArrowArray::new(self);
let schema = FFI_ArrowSchema::try_from(self.data_type()).map_err(to_py_err)?;

let module = py.import("pyarrow")?;
let module = py.import_bound("pyarrow")?;
let class = module.getattr("Array")?;
let array = class.call_method1(
"_import_from_c",
Expand All @@ -310,9 +314,9 @@ impl ToPyArrow for ArrayData {
}

impl<T: FromPyArrow> FromPyArrow for Vec<T> {
fn from_pyarrow(value: &PyAny) -> PyResult<Self> {
fn from_pyarrow(value: &Bound<PyAny>) -> PyResult<Self> {
let list = value.downcast::<PyList>()?;
list.iter().map(|x| T::from_pyarrow(x)).collect()
list.iter().map(|x| T::from_pyarrow(&x)).collect()
}
}

Expand All @@ -327,7 +331,7 @@ impl<T: ToPyArrow> ToPyArrow for Vec<T> {
}

impl FromPyArrow for RecordBatch {
fn from_pyarrow(value: &PyAny) -> PyResult<Self> {
fn from_pyarrow(value: &Bound<PyAny>) -> PyResult<Self> {
// Newer versions of PyArrow as well as other libraries with Arrow data implement this
// method, so prefer it over _export_to_c.
// See https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
Expand All @@ -340,8 +344,10 @@ impl FromPyArrow for RecordBatch {
));
}

let schema_capsule: &PyCapsule = PyTryInto::try_into(tuple.get_item(0)?)?;
let array_capsule: &PyCapsule = PyTryInto::try_into(tuple.get_item(1)?)?;
let schema_capsule_owner = tuple.get_item(0)?;
let schema_capsule: &Bound<PyCapsule> = schema_capsule_owner.downcast()?;
let array_capsule_owner = tuple.get_item(1)?;
let array_capsule: &Bound<PyCapsule> = array_capsule_owner.downcast()?;

validate_pycapsule(schema_capsule, "arrow_schema")?;
validate_pycapsule(array_capsule, "arrow_array")?;
Expand Down Expand Up @@ -370,12 +376,13 @@ impl FromPyArrow for RecordBatch {
validate_class("RecordBatch", value)?;
// TODO(kszucs): implement the FFI conversions in arrow-rs for RecordBatches
let schema = value.getattr("schema")?;
let schema = Arc::new(Schema::from_pyarrow(schema)?);
let schema = Arc::new(Schema::from_pyarrow(&schema)?);

let arrays = value.getattr("columns")?.downcast::<PyList>()?;
let arrays_owner = value.getattr("columns")?;
let arrays = arrays_owner.downcast::<PyList>()?;
let arrays = arrays
.iter()
.map(|a| Ok(make_array(ArrayData::from_pyarrow(a)?)))
.map(|a| Ok(make_array(ArrayData::from_pyarrow(&a)?)))
.collect::<PyResult<_>>()?;

let batch = RecordBatch::try_new(schema, arrays).map_err(to_py_err)?;
Expand All @@ -395,13 +402,13 @@ impl ToPyArrow for RecordBatch {

/// Supports conversion from `pyarrow.RecordBatchReader` to [ArrowArrayStreamReader].
impl FromPyArrow for ArrowArrayStreamReader {
fn from_pyarrow(value: &PyAny) -> PyResult<Self> {
fn from_pyarrow(value: &Bound<PyAny>) -> PyResult<Self> {
// Newer versions of PyArrow as well as other libraries with Arrow data implement this
// method, so prefer it over _export_to_c.
// See https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
if value.hasattr("__arrow_c_stream__")? {
let capsule: &PyCapsule =
PyTryInto::try_into(value.getattr("__arrow_c_stream__")?.call0()?)?;
let capsule_owner = value.getattr("__arrow_c_stream__")?.call0()?;
let capsule: &Bound<PyCapsule> = capsule_owner.downcast()?;
validate_pycapsule(capsule, "arrow_array_stream")?;

let stream = unsafe { FFI_ArrowArrayStream::from_raw(capsule.pointer() as _) };
Expand All @@ -421,7 +428,7 @@ impl FromPyArrow for ArrowArrayStreamReader {
// make the conversion through PyArrow's private API
// this changes the pointer's memory and is thus unsafe.
// In particular, `_export_to_c` can go out of bounds
let args = PyTuple::new(value.py(), [stream_ptr as Py_uintptr_t]);
let args = PyTuple::new_bound(value.py(), [stream_ptr as Py_uintptr_t]);
value.call_method1("_export_to_c", args)?;

let stream_reader = ArrowArrayStreamReader::try_new(stream)
Expand All @@ -439,9 +446,9 @@ impl IntoPyArrow for Box<dyn RecordBatchReader + Send> {
let mut stream = FFI_ArrowArrayStream::new(self);

let stream_ptr = (&mut stream) as *mut FFI_ArrowArrayStream;
let module = py.import("pyarrow")?;
let module = py.import_bound("pyarrow")?;
let class = module.getattr("RecordBatchReader")?;
let args = PyTuple::new(py, [stream_ptr as Py_uintptr_t]);
let args = PyTuple::new_bound(py, [stream_ptr as Py_uintptr_t]);
let reader = class.call_method1("_import_from_c", args)?;

Ok(PyObject::from(reader))
Expand All @@ -463,7 +470,7 @@ impl IntoPyArrow for ArrowArrayStreamReader {
pub struct PyArrowType<T>(pub T);

impl<'source, T: FromPyArrow> FromPyObject<'source> for PyArrowType<T> {
fn extract(value: &'source PyAny) -> PyResult<Self> {
fn extract_bound(value: &Bound<'source, PyAny>) -> PyResult<Self> {
Ok(Self(T::from_pyarrow(value)?))
}
}
Expand Down
4 changes: 2 additions & 2 deletions arrow/tests/pyarrow.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,9 @@ fn test_to_pyarrow() {

let res = Python::with_gil(|py| {
let py_input = input.to_pyarrow(py)?;
let records = RecordBatch::from_pyarrow(py_input.as_ref(py))?;
let records = RecordBatch::from_pyarrow(py_input.bind(py))?;
let py_records = records.to_pyarrow(py)?;
RecordBatch::from_pyarrow(py_records.as_ref(py))
RecordBatch::from_pyarrow(py_records.bind(py))
})
.unwrap();

Expand Down