Skip to content

Commit

Permalink
Fix pyarrow memory leak (apache#3683) (apache#3893)
Browse files Browse the repository at this point in the history
* Fix pyarrow memory leak (apache#3683)

Remove ArrowArray::into_raw and try_from_raw

* Update docs

* Further deprecation

* Clippy
  • Loading branch information
tustvold authored Mar 21, 2023
1 parent 90cb00d commit 3a6f8bd
Show file tree
Hide file tree
Showing 5 changed files with 18 additions and 78 deletions.
3 changes: 0 additions & 3 deletions arrow-data/src/ffi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -118,9 +118,6 @@ struct ArrayPrivateData {

impl FFI_ArrowArray {
/// creates a new `FFI_ArrowArray` from existing data.
/// # Memory Leaks
/// This method releases `buffers`. Consumers of this struct *must* call `release` before
/// releasing this struct, or contents in `buffers` leak.
pub fn new(data: &ArrayData) -> Self {
let data_layout = layout(data.data_type());

Expand Down
18 changes: 2 additions & 16 deletions arrow/src/array/ffi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ use crate::{
ffi::ArrowArrayRef,
};

use super::{make_array, ArrayData, ArrayRef};
use super::{ArrayData, ArrayRef};

impl TryFrom<ffi::ArrowArray> for ArrayData {
type Error = ArrowError;
Expand All @@ -43,21 +43,6 @@ impl TryFrom<ArrayData> for ffi::ArrowArray {
}
}

/// Creates a new array from two FFI pointers. Used to import arrays from the C Data Interface
/// # Safety
/// Assumes that these pointers represent valid C Data Interfaces, both in memory
/// representation and lifetime via the `release` mechanism.
#[deprecated(note = "Use ArrowArray::new")]
#[allow(deprecated)]
pub unsafe fn make_array_from_raw(
array: *const ffi::FFI_ArrowArray,
schema: *const ffi::FFI_ArrowSchema,
) -> Result<ArrayRef> {
let array = ffi::ArrowArray::try_from_raw(array, schema)?;
let data = ArrayData::try_from(array)?;
Ok(make_array(data))
}

/// Exports an array to raw pointers of the C Data Interface provided by the consumer.
/// # Safety
/// Assumes that these pointers represent valid C Data Interfaces, both in memory
Expand All @@ -66,6 +51,7 @@ pub unsafe fn make_array_from_raw(
/// This function copies the content of two FFI structs [ffi::FFI_ArrowArray] and
/// [ffi::FFI_ArrowSchema] in the array to the location pointed by the raw pointers.
/// Usually the raw pointers are provided by the array data consumer.
#[deprecated(note = "Use FFI_ArrowArray::new and FFI_ArrowSchema::try_from")]
pub unsafe fn export_array_into_raw(
src: ArrayRef,
out_array: *mut ffi::FFI_ArrowArray,
Expand Down
2 changes: 1 addition & 1 deletion arrow/src/array/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ pub use arrow_data::transform::{Capacities, MutableArrayData};

#[cfg(feature = "ffi")]
#[allow(deprecated)]
pub use self::ffi::{export_array_into_raw, make_array_from_raw};
pub use self::ffi::export_array_into_raw;

// --------------------- Array's values comparison ---------------------

Expand Down
63 changes: 10 additions & 53 deletions arrow/src/ffi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -394,21 +394,15 @@ pub trait ArrowArrayRef {
/// Its main responsibility is to expose functionality that requires
/// both [FFI_ArrowArray] and [FFI_ArrowSchema].
///
/// This struct has two main paths:
///
/// ## Import from the C Data Interface
/// * [ArrowArray::empty] to allocate memory to be filled by an external call
/// * [ArrowArray::try_from_raw] to consume two non-null allocated pointers
/// * [ArrowArray::new] to create an array from [`FFI_ArrowArray`] and [`FFI_ArrowSchema`]
///
/// ## Export to the C Data Interface
/// * [ArrowArray::try_new] to create a new [ArrowArray] from Rust-specific information
/// * [ArrowArray::into_raw] to expose two pointers for [FFI_ArrowArray] and [FFI_ArrowSchema].
/// * Use [`FFI_ArrowArray`] and [`FFI_ArrowSchema`] directly
///
/// # Safety
/// Whoever creates this struct is responsible for releasing their resources. Specifically,
/// consumers *must* call [ArrowArray::into_raw] and take ownership of the individual pointers,
/// calling [FFI_ArrowArray::release] and [FFI_ArrowSchema::release] accordingly.
///
/// Furthermore, this struct assumes that the incoming data agrees with the C data interface.
/// This struct assumes that the incoming data agrees with the C data interface.
#[derive(Debug)]
pub struct ArrowArray {
pub(crate) array: Arc<FFI_ArrowArray>,
Expand Down Expand Up @@ -480,38 +474,6 @@ impl ArrowArray {
Ok(ArrowArray { array, schema })
}

/// creates a new [ArrowArray] from two pointers. Used to import from the C Data Interface.
/// # Safety
/// See safety of [ArrowArray]
/// Note that this function will copy the content pointed by the raw pointers. Considering
/// the raw pointers can be from `Arc::into_raw` or other raw pointers, users must be responsible
/// on managing the allocation of the structs by themselves.
/// # Error
/// Errors if any of the pointers is null
#[deprecated(note = "Use ArrowArray::new")]
pub unsafe fn try_from_raw(
array: *const FFI_ArrowArray,
schema: *const FFI_ArrowSchema,
) -> Result<Self> {
if array.is_null() || schema.is_null() {
return Err(ArrowError::MemoryError(
"At least one of the pointers passed to `try_from_raw` is null"
.to_string(),
));
};

let array_mut = array as *mut FFI_ArrowArray;
let schema_mut = schema as *mut FFI_ArrowSchema;

let array_data = std::ptr::replace(array_mut, FFI_ArrowArray::empty());
let schema_data = std::ptr::replace(schema_mut, FFI_ArrowSchema::empty());

Ok(Self {
array: Arc::new(array_data),
schema: Arc::new(schema_data),
})
}

/// creates a new empty [ArrowArray]. Used to import from the C Data Interface.
/// # Safety
/// See safety of [ArrowArray]
Expand All @@ -520,23 +482,16 @@ impl ArrowArray {
let array = Arc::new(FFI_ArrowArray::empty());
ArrowArray { array, schema }
}

/// exports [ArrowArray] to the C Data Interface
#[deprecated(note = "Use FFI_ArrowArray and FFI_ArrowSchema directly")]
pub fn into_raw(this: ArrowArray) -> (*const FFI_ArrowArray, *const FFI_ArrowSchema) {
(Arc::into_raw(this.array), Arc::into_raw(this.schema))
}
}

#[cfg(test)]
mod tests {
use super::*;
use crate::array::{
export_array_into_raw, make_array, Array, ArrayData, BooleanArray,
Decimal128Array, DictionaryArray, DurationSecondArray, FixedSizeBinaryArray,
FixedSizeListArray, GenericBinaryArray, GenericListArray, GenericStringArray,
Int32Array, MapArray, OffsetSizeTrait, Time32MillisecondArray,
TimestampMillisecondArray, UInt32Array,
make_array, Array, ArrayData, BooleanArray, Decimal128Array, DictionaryArray,
DurationSecondArray, FixedSizeBinaryArray, FixedSizeListArray,
GenericBinaryArray, GenericListArray, GenericStringArray, Int32Array, MapArray,
OffsetSizeTrait, Time32MillisecondArray, TimestampMillisecondArray, UInt32Array,
};
use crate::compute::kernels;
use crate::datatypes::{Field, Int8Type};
Expand Down Expand Up @@ -1040,7 +995,9 @@ mod tests {
}

#[test]
#[allow(deprecated)]
fn test_export_array_into_raw() -> Result<()> {
use crate::array::export_array_into_raw;
let array = make_array(Int32Array::from(vec![1, 2, 3]).into_data());

// Assume two raw pointers provided by the consumer
Expand Down
10 changes: 5 additions & 5 deletions arrow/src/pyarrow.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
//! arrays from and to Python.
use std::convert::{From, TryFrom};
use std::ptr::addr_of_mut;
use std::ptr::{addr_of, addr_of_mut};
use std::sync::Arc;

use pyo3::ffi::Py_uintptr_t;
Expand Down Expand Up @@ -133,16 +133,16 @@ impl PyArrowConvert for ArrayData {
}

fn to_pyarrow(&self, py: Python) -> PyResult<PyObject> {
let array = ffi::ArrowArray::try_from(self.clone()).map_err(to_py_err)?;
let (array_pointer, schema_pointer) = ffi::ArrowArray::into_raw(array);
let array = FFI_ArrowArray::new(self);
let schema = FFI_ArrowSchema::try_from(self.data_type()).map_err(to_py_err)?;

let module = py.import("pyarrow")?;
let class = module.getattr("Array")?;
let array = class.call_method1(
"_import_from_c",
(
array_pointer as Py_uintptr_t,
schema_pointer as Py_uintptr_t,
addr_of!(array) as Py_uintptr_t,
addr_of!(schema) as Py_uintptr_t,
),
)?;
Ok(array.to_object(py))
Expand Down

0 comments on commit 3a6f8bd

Please sign in to comment.