Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Added support for ffi for FixedSizeList and FixedSizeBinary #565

Merged
merged 1 commit into from
Nov 2, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 37 additions & 0 deletions arrow-pyarrow-integration-testing/tests/test_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,14 @@ def test_string_sliced(self):
assert a.to_pylist() == b.to_pylist()
assert a.type == b.type

def test_fixed_binary(self):
a = pyarrow.array([b"aa", None, b"cc"], pyarrow.binary(2))
b = arrow_pyarrow_integration_testing.round_trip_array(a)

b.validate(full=True)
assert a.to_pylist() == b.to_pylist()
assert a.type == b.type

def test_decimal_roundtrip(self):
"""
Python -> Rust -> Python
Expand Down Expand Up @@ -179,6 +187,35 @@ def test_list_list_array(self):
assert a.to_pylist() == b.to_pylist()
assert a.type == b.type

def test_fixed_list(self):
"""
Python -> Rust -> Python
"""
a = pyarrow.array(
[None, [1, 2], [4, 5]],
pyarrow.list_(pyarrow.int64(), 2),
)
b = arrow_pyarrow_integration_testing.round_trip_array(a)

b.validate(full=True)
assert a.to_pylist() == b.to_pylist()
assert a.type == b.type

# same as https://issues.apache.org/jira/browse/ARROW-14383
def _test_fixed_list_sliced(self):
"""
Python -> Rust -> Python
"""
a = pyarrow.array(
[None, [1, 2], [4, 5]],
pyarrow.list_(pyarrow.int64(), 2),
).slice(1, 2)
b = arrow_pyarrow_integration_testing.round_trip_array(a)

b.validate(full=True)
assert a.to_pylist() == b.to_pylist()
assert a.type == b.type

def test_dict(self):
"""
Python -> Rust -> Python
Expand Down
4 changes: 2 additions & 2 deletions src/array/binary/ffi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,8 @@ impl<O: Offset, A: ffi::ArrowArrayRef> FromFfi<A> for BinaryArray<O> {
let data_type = array.field().data_type().clone();

let validity = unsafe { array.validity() }?;
let offsets = unsafe { array.buffer::<O>(0) }?;
let values = unsafe { array.buffer::<u8>(1) }?;
let offsets = unsafe { array.buffer::<O>(1) }?;
let values = unsafe { array.buffer::<u8>(2) }?;

Ok(Self::from_data_unchecked(
data_type, offsets, values, validity,
Expand Down
2 changes: 1 addition & 1 deletion src/array/boolean/ffi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ impl<A: ffi::ArrowArrayRef> FromFfi<A> for BooleanArray {
unsafe fn try_from_ffi(array: A) -> Result<Self> {
let data_type = array.field().data_type().clone();
let validity = unsafe { array.validity() }?;
let values = unsafe { array.bitmap(0) }?;
let values = unsafe { array.bitmap(1) }?;
Ok(Self::from_data(data_type, values, validity))
}
}
2 changes: 1 addition & 1 deletion src/array/dictionary/ffi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ impl<K: DictionaryKey, A: ffi::ArrowArrayRef> FromFfi<A> for DictionaryArray<K>
unsafe fn try_from_ffi(array: A) -> Result<Self> {
// keys: similar to PrimitiveArray, but the datatype is the inner one
let validity = unsafe { array.validity() }?;
let values = unsafe { array.buffer::<K>(0) }?;
let values = unsafe { array.buffer::<K>(1) }?;

let keys = PrimitiveArray::<K>::from_data(K::DATA_TYPE, values, validity);
let values = array.dictionary()?.unwrap();
Expand Down
2 changes: 1 addition & 1 deletion src/array/ffi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ pub(crate) unsafe trait ToFfi {

/// Trait describing how a struct imports into itself from the
/// [C data interface](https://arrow.apache.org/docs/format/CDataInterface.html) (FFI).
pub trait FromFfi<T: ffi::ArrowArrayRef>: Sized {
pub(crate) trait FromFfi<T: ffi::ArrowArrayRef>: Sized {
/// Convert itself from FFI.
/// # Safety
/// This function is intrinsically `unsafe` as it requires the FFI to be made according
Expand Down
17 changes: 16 additions & 1 deletion src/array/fixed_size_binary/ffi.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
use crate::{array::ToFfi, bitmap::align};
use crate::{
array::{FromFfi, ToFfi},
bitmap::align,
error::Result,
ffi,
};

use super::FixedSizeBinaryArray;

Expand Down Expand Up @@ -42,3 +47,13 @@ unsafe impl ToFfi for FixedSizeBinaryArray {
}
}
}

impl<A: ffi::ArrowArrayRef> FromFfi<A> for FixedSizeBinaryArray {
unsafe fn try_from_ffi(array: A) -> Result<Self> {
let data_type = array.field().data_type().clone();
let validity = unsafe { array.validity() }?;
let values = unsafe { array.buffer::<u8>(1) }?;

Ok(Self::from_data(data_type, values, validity))
}
}
20 changes: 19 additions & 1 deletion src/array/fixed_size_list/ffi.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,14 @@
use std::sync::Arc;

use super::super::{ffi::ToFfi, Array};
use super::FixedSizeListArray;
use crate::{
array::{
ffi::{FromFfi, ToFfi},
Array,
},
error::Result,
ffi,
};

unsafe impl ToFfi for FixedSizeListArray {
fn buffers(&self) -> Vec<Option<std::ptr::NonNull<u8>>> {
Expand All @@ -25,3 +32,14 @@ unsafe impl ToFfi for FixedSizeListArray {
self.clone()
}
}

impl<A: ffi::ArrowArrayRef> FromFfi<A> for FixedSizeListArray {
unsafe fn try_from_ffi(array: A) -> Result<Self> {
let data_type = array.field().data_type().clone();
let validity = unsafe { array.validity() }?;
let child = unsafe { array.child(0)? };
let values = ffi::try_from(child)?.into();

Ok(Self::from_data(data_type, values, validity))
}
}
2 changes: 1 addition & 1 deletion src/array/list/ffi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ impl<O: Offset, A: ffi::ArrowArrayRef> FromFfi<A> for ListArray<O> {
unsafe fn try_from_ffi(array: A) -> Result<Self> {
let data_type = array.field().data_type().clone();
let validity = unsafe { array.validity() }?;
let offsets = unsafe { array.buffer::<O>(0) }?;
let offsets = unsafe { array.buffer::<O>(1) }?;
let child = unsafe { array.child(0)? };
let values = ffi::try_from(child)?.into();

Expand Down
18 changes: 9 additions & 9 deletions src/array/list/mutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,15 +42,6 @@ impl<O: Offset, M: MutableArray + Default> MutableListArray<O, M> {
validity: None,
}
}

/// Shrinks the capacity of the [`MutableListArray`] to fit its current length.
pub fn shrink_to_fit(&mut self) {
self.values.shrink_to_fit();
self.offsets.shrink_to_fit();
if let Some(validity) = &mut self.validity {
validity.shrink_to_fit()
}
}
}

impl<O: Offset, M: MutableArray + Default> Default for MutableListArray<O, M> {
Expand Down Expand Up @@ -188,6 +179,15 @@ impl<O: Offset, M: MutableArray> MutableListArray<O, M> {
let a: ListArray<O> = self.into();
Arc::new(a)
}

/// Shrinks the capacity of the [`MutableListArray`] to fit its current length.
pub fn shrink_to_fit(&mut self) {
self.values.shrink_to_fit();
self.offsets.shrink_to_fit();
if let Some(validity) = &mut self.validity {
validity.shrink_to_fit()
}
}
}

impl<O: Offset, M: MutableArray + Default + 'static> MutableArray for MutableListArray<O, M> {
Expand Down
2 changes: 1 addition & 1 deletion src/array/map/ffi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ impl<A: ffi::ArrowArrayRef> FromFfi<A> for MapArray {
unsafe fn try_from_ffi(array: A) -> Result<Self> {
let data_type = array.field().data_type().clone();
let validity = unsafe { array.validity() }?;
let offsets = unsafe { array.buffer::<i32>(0) }?;
let offsets = unsafe { array.buffer::<i32>(1) }?;
let child = array.child(0)?;
let values = ffi::try_from(child)?.into();

Expand Down
2 changes: 1 addition & 1 deletion src/array/primitive/ffi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ impl<T: NativeType, A: ffi::ArrowArrayRef> FromFfi<A> for PrimitiveArray<T> {
unsafe fn try_from_ffi(array: A) -> Result<Self> {
let data_type = array.field().data_type().clone();
let validity = unsafe { array.validity() }?;
let values = unsafe { array.buffer::<T>(0) }?;
let values = unsafe { array.buffer::<T>(1) }?;

Ok(Self::from_data(data_type, values, validity))
}
Expand Down
4 changes: 2 additions & 2 deletions src/array/union/ffi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,11 @@ impl<A: ffi::ArrowArrayRef> FromFfi<A> for UnionArray {
let data_type = field.data_type().clone();
let fields = Self::get_fields(field.data_type());

let mut types = unsafe { array.buffer::<i8>(0) }?;
let mut types = unsafe { array.buffer::<i8>(1) }?;
let offsets = if Self::is_sparse(&data_type) {
None
} else {
Some(unsafe { array.buffer::<i32>(1) }?)
Some(unsafe { array.buffer::<i32>(2) }?)
};

let length = array.array().len();
Expand Down
4 changes: 2 additions & 2 deletions src/array/utf8/ffi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,8 @@ impl<O: Offset, A: ffi::ArrowArrayRef> FromFfi<A> for Utf8Array<O> {
unsafe fn try_from_ffi(array: A) -> Result<Self> {
let data_type = array.field().data_type().clone();
let validity = unsafe { array.validity() }?;
let offsets = unsafe { array.buffer::<O>(0) }?;
let values = unsafe { array.buffer::<u8>(1)? };
let offsets = unsafe { array.buffer::<O>(1) }?;
let values = unsafe { array.buffer::<u8>(2)? };

Ok(Self::from_data_unchecked(
data_type, offsets, values, validity,
Expand Down
2 changes: 2 additions & 0 deletions src/ffi/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,10 @@ pub unsafe fn try_from<A: ArrowArrayRef>(array: A) -> Result<Box<dyn Array>> {
LargeUtf8 => Box::new(Utf8Array::<i64>::try_from_ffi(array)?),
Binary => Box::new(BinaryArray::<i32>::try_from_ffi(array)?),
LargeBinary => Box::new(BinaryArray::<i64>::try_from_ffi(array)?),
FixedSizeBinary => Box::new(FixedSizeBinaryArray::try_from_ffi(array)?),
List => Box::new(ListArray::<i32>::try_from_ffi(array)?),
LargeList => Box::new(ListArray::<i64>::try_from_ffi(array)?),
FixedSizeList => Box::new(FixedSizeListArray::try_from_ffi(array)?),
Struct => Box::new(StructArray::try_from_ffi(array)?),
Dictionary(key_type) => {
with_match_physical_dictionary_key_type!(key_type, |$T| {
Expand Down
31 changes: 27 additions & 4 deletions src/ffi/ffi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,20 @@ fn buffer_offset(array: &Ffi_ArrowArray, data_type: &DataType, i: usize) -> usiz
// to fetch offset buffer's len to build the second buffer.
fn buffer_len(array: &Ffi_ArrowArray, data_type: &DataType, i: usize) -> Result<usize> {
Ok(match (data_type.to_physical_type(), i) {
(PhysicalType::FixedSizeBinary, 1) => {
if let DataType::FixedSizeBinary(size) = data_type.to_logical_type() {
*size * (array.offset as usize + array.length as usize)
} else {
unreachable!()
}
}
(PhysicalType::FixedSizeList, 1) => {
if let DataType::FixedSizeList(_, size) = data_type.to_logical_type() {
*size * (array.offset as usize + array.length as usize)
} else {
unreachable!()
}
}
(PhysicalType::Utf8, 1)
| (PhysicalType::LargeUtf8, 1)
| (PhysicalType::Binary, 1)
Expand Down Expand Up @@ -321,7 +335,7 @@ fn create_dictionary(
}
}

pub trait ArrowArrayRef {
pub trait ArrowArrayRef: std::fmt::Debug {
fn deallocation(&self) -> Deallocation {
Deallocation::Foreign(self.parent().clone())
}
Expand All @@ -344,12 +358,11 @@ pub trait ArrowArrayRef {
/// The caller must guarantee that the buffer `index` corresponds to a bitmap.
/// This function assumes that the bitmap created from FFI is valid; this is impossible to prove.
unsafe fn buffer<T: NativeType>(&self, index: usize) -> Result<Buffer<T>> {
// +1 to ignore null bitmap
create_buffer::<T>(
self.array(),
self.field().data_type(),
self.deallocation(),
index + 1,
index,
)
}

Expand All @@ -358,7 +371,7 @@ pub trait ArrowArrayRef {
/// This function assumes that the bitmap created from FFI is valid; this is impossible to prove.
unsafe fn bitmap(&self, index: usize) -> Result<Bitmap> {
// +1 to ignore null bitmap
create_bitmap(self.array(), self.deallocation(), index + 1)
create_bitmap(self.array(), self.deallocation(), index)
}

/// # Safety
Expand All @@ -371,6 +384,8 @@ pub trait ArrowArrayRef {
create_dictionary(self.array(), self.field(), self.parent().clone())
}

fn n_buffers(&self) -> usize;

fn parent(&self) -> &Arc<ArrowArray>;
fn array(&self) -> &Ffi_ArrowArray;
fn field(&self) -> &Field;
Expand Down Expand Up @@ -420,6 +435,10 @@ impl ArrowArrayRef for Arc<ArrowArray> {
fn array(&self) -> &Ffi_ArrowArray {
self.array.as_ref()
}

fn n_buffers(&self) -> usize {
self.array.n_buffers as usize
}
}

#[derive(Debug)]
Expand All @@ -442,6 +461,10 @@ impl<'a> ArrowArrayRef for ArrowArrayChild<'a> {
fn array(&self) -> &Ffi_ArrowArray {
self.array
}

fn n_buffers(&self) -> usize {
self.array.n_buffers as usize
}
}

impl<'a> ArrowArrayChild<'a> {
Expand Down
17 changes: 16 additions & 1 deletion src/ffi/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,9 @@ impl Ffi_ArrowSchema {
DataType::List(field) => {
vec![Box::new(Ffi_ArrowSchema::new(field.as_ref()))]
}
DataType::FixedSizeList(field, _) => {
vec![Box::new(Ffi_ArrowSchema::new(field.as_ref()))]
}
DataType::LargeList(field) => {
vec![Box::new(Ffi_ArrowSchema::new(field.as_ref()))]
}
Expand Down Expand Up @@ -290,6 +293,17 @@ unsafe fn to_data_type(schema: &Ffi_ArrowSchema) -> Result<DataType> {
DataType::Timestamp(TimeUnit::Microsecond, Some(parts[1].to_string()))
} else if parts.len() == 2 && parts[0] == "tsn" {
DataType::Timestamp(TimeUnit::Nanosecond, Some(parts[1].to_string()))
} else if parts.len() == 2 && parts[0] == "w" {
let size = parts[1]
.parse::<usize>()
.map_err(|_| ArrowError::Ffi("size is not a valid integer".to_string()))?;
DataType::FixedSizeBinary(size)
} else if parts.len() == 2 && parts[0] == "+w" {
let size = parts[1]
.parse::<usize>()
.map_err(|_| ArrowError::Ffi("size is not a valid integer".to_string()))?;
let child = to_field(schema.child(0))?;
DataType::FixedSizeList(Box::new(child), size)
} else if parts.len() == 2 && parts[0] == "d" {
let parts = parts[1].split(',').collect::<Vec<_>>();
if parts.len() < 2 || parts.len() > 3 {
Expand Down Expand Up @@ -395,7 +409,7 @@ fn to_format(data_type: &DataType) -> String {
DataType::List(_) => "+l".to_string(),
DataType::LargeList(_) => "+L".to_string(),
DataType::Struct(_) => "+s".to_string(),
DataType::FixedSizeBinary(size) => format!("w{}", size),
DataType::FixedSizeBinary(size) => format!("w:{}", size),
DataType::FixedSizeList(_, size) => format!("+w:{}", size),
DataType::Union(f, ids, mode) => {
let sparsness = if mode.is_sparse() { 's' } else { 'd' };
Expand All @@ -419,6 +433,7 @@ fn to_format(data_type: &DataType) -> String {
pub(super) fn get_field_child(field: &Field, index: usize) -> Result<Field> {
match (index, field.data_type()) {
(0, DataType::List(field)) => Ok(field.as_ref().clone()),
(0, DataType::FixedSizeList(field, _)) => Ok(field.as_ref().clone()),
(0, DataType::LargeList(field)) => Ok(field.as_ref().clone()),
(0, DataType::Map(field, _)) => Ok(field.as_ref().clone()),
(index, DataType::Struct(fields)) => Ok(fields[index].clone()),
Expand Down
16 changes: 16 additions & 0 deletions tests/it/ffi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,22 @@ fn list() -> Result<()> {
test_round_trip(array)
}

#[test]
fn fixed_list() -> Result<()> {
let data = vec![
Some(vec![Some(1i32), Some(2), Some(3)]),
None,
Some(vec![Some(4), None, Some(6)]),
];

let mut array = MutableFixedSizeListArray::new(MutablePrimitiveArray::<i32>::new(), 3);
array.try_extend(data)?;

let array: FixedSizeListArray = array.into();

test_round_trip(array)
}

#[test]
fn list_list() -> Result<()> {
let data = vec![
Expand Down