diff --git a/arrow/src/array/ffi.rs b/arrow/src/array/ffi.rs index fb7771ac620e..c7bc8e9f8a74 100644 --- a/arrow/src/array/ffi.rs +++ b/arrow/src/array/ffi.rs @@ -47,6 +47,8 @@ impl TryFrom for ffi::ArrowArray { /// # Safety /// Assumes that these pointers represent valid C Data Interfaces, both in memory /// representation and lifetime via the `release` mechanism. +#[deprecated(note = "Use ArrowArray::new")] +#[allow(deprecated)] pub unsafe fn make_array_from_raw( array: *const ffi::FFI_ArrowArray, schema: *const ffi::FFI_ArrowSchema, @@ -91,30 +93,22 @@ mod tests { StructArray, UInt32Array, UInt64Array, }, datatypes::{DataType, Field}, - ffi::ArrowArray, + ffi::{ArrowArray, FFI_ArrowArray, FFI_ArrowSchema}, }; use std::convert::TryFrom; use std::sync::Arc; fn test_round_trip(expected: &ArrayData) -> Result<()> { - // create a `ArrowArray` from the data. - let d1 = ArrowArray::try_from(expected.clone())?; - - // here we export the array as 2 pointers. We would have no control over ownership if it was not for - // the release mechanism. - let (array, schema) = ArrowArray::into_raw(d1); + // here we export the array + let array = FFI_ArrowArray::new(expected); + let schema = FFI_ArrowSchema::try_from(expected.data_type())?; // simulate an external consumer by being the consumer - let d1 = unsafe { ArrowArray::try_from_raw(array, schema) }?; + let d1 = ArrowArray::new(array, schema); let result = &ArrayData::try_from(d1)?; assert_eq!(result, expected); - - unsafe { - Arc::from_raw(array); - Arc::from_raw(schema); - } Ok(()) } diff --git a/arrow/src/array/mod.rs b/arrow/src/array/mod.rs index 1a10725df678..09348996eafa 100644 --- a/arrow/src/array/mod.rs +++ b/arrow/src/array/mod.rs @@ -34,6 +34,7 @@ pub use arrow_data::{ pub use arrow_data::transform::{Capacities, MutableArrayData}; #[cfg(feature = "ffi")] +#[allow(deprecated)] pub use self::ffi::{export_array_into_raw, make_array_from_raw}; // --------------------- Array's values comparison --------------------- diff --git a/arrow/src/ffi.rs b/arrow/src/ffi.rs index 78dd1ef453cb..0f0f94c7a6b8 100644 --- a/arrow/src/ffi.rs +++ b/arrow/src/ffi.rs @@ -24,68 +24,63 @@ //! The second interface maps native Rust types to the Rust-specific implementation of Arrow such as `format` to `Datatype`, //! `Buffer`, etc. This is handled by `ArrowArray`. //! +//! +//! Export to FFI +//! //! ```rust //! # use std::sync::Arc; -//! # use arrow::array::{Int32Array, Array, ArrayData, export_array_into_raw, make_array, make_array_from_raw}; -//! # use arrow::error::{Result, ArrowError}; +//! # use arrow::array::{Int32Array, Array, ArrayData, make_array}; +//! # use arrow::error::Result; //! # use arrow::compute::kernels::arithmetic; //! # use arrow::ffi::{ArrowArray, FFI_ArrowArray, FFI_ArrowSchema}; -//! # use std::convert::TryFrom; //! # fn main() -> Result<()> { //! // create an array natively //! let array = Int32Array::from(vec![Some(1), None, Some(3)]); +//! let data = array.into_data(); //! -//! // export it -//! -//! let ffi_array = ArrowArray::try_new(array.data().clone())?; -//! let (array_ptr, schema_ptr) = ArrowArray::into_raw(ffi_array); -//! -//! // consumed and used by something else... +//! // Export it +//! let out_array = FFI_ArrowArray::new(&data); +//! let out_schema = FFI_ArrowSchema::try_from(data.data_type())?; //! //! // import it -//! let array = unsafe { make_array_from_raw(array_ptr, schema_ptr)? }; +//! let array = ArrowArray::new(out_array, out_schema); +//! let array = Int32Array::from(ArrayData::try_from(array)?); //! //! // perform some operation -//! let array = array.as_any().downcast_ref::().ok_or( -//! ArrowError::ParseError("Expects an int32".to_string()), -//! )?; //! let array = arithmetic::add(&array, &array)?; //! //! // verify //! assert_eq!(array, Int32Array::from(vec![Some(2), None, Some(6)])); +//! # +//! # Ok(()) +//! # } +//! ``` //! -//! // Simulate if raw pointers are provided by consumer -//! let array = make_array(Int32Array::from(vec![Some(1), None, Some(3)]).into_data()); -//! -//! let out_array = Box::new(FFI_ArrowArray::empty()); -//! let out_schema = Box::new(FFI_ArrowSchema::empty()); -//! let out_array_ptr = Box::into_raw(out_array); -//! let out_schema_ptr = Box::into_raw(out_schema); -//! -//! // export array into raw pointers from consumer -//! unsafe { export_array_into_raw(array, out_array_ptr, out_schema_ptr)?; }; -//! -//! // import it -//! let array = unsafe { make_array_from_raw(out_array_ptr, out_schema_ptr)? }; +//! Import from FFI //! -//! // perform some operation -//! let array = array.as_any().downcast_ref::().ok_or( -//! ArrowError::ParseError("Expects an int32".to_string()), -//! )?; -//! let array = arithmetic::add(&array, &array)?; -//! -//! // verify -//! assert_eq!(array, Int32Array::from(vec![Some(2), None, Some(6)])); +//! ``` +//! # use std::ptr::addr_of_mut; +//! # use arrow::ffi::{ArrowArray, FFI_ArrowArray, FFI_ArrowSchema}; +//! # use arrow_array::{ArrayRef, make_array}; +//! # use arrow_schema::ArrowError; +//! # +//! /// A foreign data container that can export to C Data interface +//! struct ForeignArray {}; //! -//! // (drop/release) -//! unsafe { -//! Box::from_raw(out_array_ptr); -//! Box::from_raw(out_schema_ptr); -//! Arc::from_raw(array_ptr); -//! Arc::from_raw(schema_ptr); +//! impl ForeignArray { +//! /// Export from foreign array representation to C Data interface +//! /// e.g. +//! fn export_to_c(&self, array: *mut FFI_ArrowArray, schema: *mut FFI_ArrowSchema) { +//! // ... +//! } //! } //! -//! Ok(()) +//! /// Import an [`ArrayRef`] from a [`ForeignArray`] +//! fn import_array(foreign: &ForeignArray) -> Result { +//! let mut schema = FFI_ArrowSchema::empty(); +//! let mut array = FFI_ArrowArray::empty(); +//! foreign.export_to_c(addr_of_mut!(array), addr_of_mut!(schema)); +//! Ok(make_array(ArrowArray::new(array, schema).try_into()?)) //! } //! ``` @@ -139,7 +134,15 @@ bitflags! { /// ABI-compatible struct for `ArrowSchema` from C Data Interface /// See -/// This was created by bindgen +/// +/// ``` +/// # use arrow::ffi::FFI_ArrowSchema; +/// # use arrow_data::ArrayData; +/// fn array_schema(data: &ArrayData) -> FFI_ArrowSchema { +/// FFI_ArrowSchema::try_from(data.data_type()).unwrap() +/// } +/// ``` +/// #[repr(C)] #[derive(Debug)] pub struct FFI_ArrowSchema { @@ -394,7 +397,14 @@ fn bit_width(data_type: &DataType, i: usize) -> Result { /// ABI-compatible struct for ArrowArray from C Data Interface /// See -/// This was created by bindgen +/// +/// ``` +/// # use arrow::ffi::FFI_ArrowArray; +/// # use arrow_array::Array; +/// fn export_array(array: &dyn Array) -> FFI_ArrowArray { +/// FFI_ArrowArray::new(array.data()) +/// } +/// ``` #[repr(C)] #[derive(Debug)] pub struct FFI_ArrowArray { @@ -859,6 +869,14 @@ impl<'a> ArrowArrayRef for ArrowArrayChild<'a> { } impl ArrowArray { + /// Creates a new [`ArrowArray`] from the provided array and schema + pub fn new(array: FFI_ArrowArray, schema: FFI_ArrowSchema) -> Self { + Self { + array: Arc::new(array), + schema: Arc::new(schema), + } + } + /// creates a new `ArrowArray`. This is used to export to the C Data Interface. /// /// # Memory Leaks @@ -878,6 +896,7 @@ impl ArrowArray { /// on managing the allocation of the structs by themselves. /// # Error /// Errors if any of the pointers is null + #[deprecated(note = "Use ArrowArray::new")] pub unsafe fn try_from_raw( array: *const FFI_ArrowArray, schema: *const FFI_ArrowSchema, @@ -911,6 +930,7 @@ impl ArrowArray { } /// exports [ArrowArray] to the C Data Interface + #[deprecated(note = "Use FFI_ArrowArray and FFI_ArrowSchema directly")] pub fn into_raw(this: ArrowArray) -> (*const FFI_ArrowArray, *const FFI_ArrowSchema) { (Arc::into_raw(this.array), Arc::into_raw(this.schema)) } @@ -946,28 +966,49 @@ mod tests { use arrow_array::types::{Float64Type, Int32Type}; use arrow_array::{Float64Array, UnionArray}; use std::convert::TryFrom; + use std::mem::ManuallyDrop; + use std::ptr::addr_of_mut; #[test] - fn test_round_trip() -> Result<()> { + fn test_round_trip() { // create an array natively let array = Int32Array::from(vec![1, 2, 3]); // export it - let array = ArrowArray::try_from(array.into_data())?; + let array = ArrowArray::try_from(array.into_data()).unwrap(); // (simulate consumer) import it - let data = ArrayData::try_from(array)?; - let array = make_array(data); - - // perform some operation - let array = array.as_any().downcast_ref::().unwrap(); - let array = kernels::arithmetic::add(array, array).unwrap(); + let array = Int32Array::from(ArrayData::try_from(array).unwrap()); + let array = kernels::arithmetic::add(&array, &array).unwrap(); // verify assert_eq!(array, Int32Array::from(vec![2, 4, 6])); + } - // (drop/release) - Ok(()) + #[test] + fn test_import() { + // Model receiving const pointers from an external system + + // Create an array natively + let data = Int32Array::from(vec![1, 2, 3]).into_data(); + let schema = FFI_ArrowSchema::try_from(data.data_type()).unwrap(); + let array = FFI_ArrowArray::new(&data); + + // Use ManuallyDrop to avoid Box:Drop recursing + let schema = Box::new(ManuallyDrop::new(schema)); + let array = Box::new(ManuallyDrop::new(array)); + + let schema_ptr = &**schema as *const _; + let array_ptr = &**array as *const _; + + // We can read them back to memory + // SAFETY: + // Pointers are aligned and valid + let array = + unsafe { ArrowArray::new(ptr::read(array_ptr), ptr::read(schema_ptr)) }; + + let array = Int32Array::from(ArrayData::try_from(array).unwrap()); + assert_eq!(array, Int32Array::from(vec![1, 2, 3])); } #[test] @@ -1424,31 +1465,28 @@ mod tests { let array = make_array(Int32Array::from(vec![1, 2, 3]).into_data()); // Assume two raw pointers provided by the consumer - let out_array = Box::new(FFI_ArrowArray::empty()); - let out_schema = Box::new(FFI_ArrowSchema::empty()); - let out_array_ptr = Box::into_raw(out_array); - let out_schema_ptr = Box::into_raw(out_schema); - - unsafe { - export_array_into_raw(array, out_array_ptr, out_schema_ptr)?; + let mut out_array = FFI_ArrowArray::empty(); + let mut out_schema = FFI_ArrowSchema::empty(); + + { + let out_array_ptr = addr_of_mut!(out_array); + let out_schema_ptr = addr_of_mut!(out_schema); + unsafe { + export_array_into_raw(array, out_array_ptr, out_schema_ptr)?; + } } // (simulate consumer) import it - unsafe { - let array = ArrowArray::try_from_raw(out_array_ptr, out_schema_ptr).unwrap(); - let data = ArrayData::try_from(array)?; - let array = make_array(data); - - // perform some operation - let array = array.as_any().downcast_ref::().unwrap(); - let array = kernels::arithmetic::add(array, array).unwrap(); + let array = ArrowArray::new(out_array, out_schema); + let data = ArrayData::try_from(array)?; + let array = make_array(data); - // verify - assert_eq!(array, Int32Array::from(vec![2, 4, 6])); + // perform some operation + let array = array.as_any().downcast_ref::().unwrap(); + let array = kernels::arithmetic::add(array, array).unwrap(); - drop(Box::from_raw(out_array_ptr)); - drop(Box::from_raw(out_schema_ptr)); - } + // verify + assert_eq!(array, Int32Array::from(vec![2, 4, 6])); Ok(()) } diff --git a/arrow/src/pyarrow.rs b/arrow/src/pyarrow.rs index 09933304ecf9..110fd9cfaa82 100644 --- a/arrow/src/pyarrow.rs +++ b/arrow/src/pyarrow.rs @@ -19,6 +19,7 @@ //! arrays from and to Python. use std::convert::{From, TryFrom}; +use std::ptr::addr_of_mut; use std::sync::Arc; use pyo3::ffi::Py_uintptr_t; @@ -30,7 +31,7 @@ use crate::array::{make_array, Array, ArrayData}; use crate::datatypes::{DataType, Field, Schema}; use crate::error::ArrowError; use crate::ffi; -use crate::ffi::FFI_ArrowSchema; +use crate::ffi::{FFI_ArrowArray, FFI_ArrowSchema}; use crate::ffi_stream::{ export_reader_into_raw, ArrowArrayStreamReader, FFI_ArrowArrayStream, }; @@ -111,8 +112,8 @@ impl PyArrowConvert for Schema { impl PyArrowConvert for ArrayData { fn from_pyarrow(value: &PyAny) -> PyResult { // prepare a pointer to receive the Array struct - let (array_pointer, schema_pointer) = - ffi::ArrowArray::into_raw(unsafe { ffi::ArrowArray::empty() }); + let mut array = FFI_ArrowArray::empty(); + let mut schema = FFI_ArrowSchema::empty(); // make the conversion through PyArrow's private API // this changes the pointer's memory and is thus unsafe. @@ -120,15 +121,12 @@ impl PyArrowConvert for ArrayData { value.call_method1( "_export_to_c", ( - array_pointer as Py_uintptr_t, - schema_pointer as Py_uintptr_t, + addr_of_mut!(array) as Py_uintptr_t, + addr_of_mut!(schema) as Py_uintptr_t, ), )?; - let ffi_array = unsafe { - ffi::ArrowArray::try_from_raw(array_pointer, schema_pointer) - .map_err(to_py_err)? - }; + let ffi_array = ffi::ArrowArray::new(array, schema); let data = ArrayData::try_from(ffi_array).map_err(to_py_err)?; Ok(data)