Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
Added ffi support for FixedSizeList and FixedSizeBinary
Browse files Browse the repository at this point in the history
  • Loading branch information
jorgecarleitao committed Nov 2, 2021
1 parent e4c37f9 commit 599b8ac
Show file tree
Hide file tree
Showing 17 changed files with 154 additions and 28 deletions.
37 changes: 37 additions & 0 deletions arrow-pyarrow-integration-testing/tests/test_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,14 @@ def test_string_sliced(self):
assert a.to_pylist() == b.to_pylist()
assert a.type == b.type

def test_fixed_binary(self):
a = pyarrow.array([b"aa", None, b"cc"], pyarrow.binary(2))
b = arrow_pyarrow_integration_testing.round_trip_array(a)

b.validate(full=True)
assert a.to_pylist() == b.to_pylist()
assert a.type == b.type

def test_decimal_roundtrip(self):
"""
Python -> Rust -> Python
Expand Down Expand Up @@ -179,6 +187,35 @@ def test_list_list_array(self):
assert a.to_pylist() == b.to_pylist()
assert a.type == b.type

def test_fixed_list(self):
"""
Python -> Rust -> Python
"""
a = pyarrow.array(
[None, [1, 2], [4, 5]],
pyarrow.list_(pyarrow.int64(), 2),
)
b = arrow_pyarrow_integration_testing.round_trip_array(a)

b.validate(full=True)
assert a.to_pylist() == b.to_pylist()
assert a.type == b.type

# same as https://issues.apache.org/jira/browse/ARROW-14383
def _test_fixed_list_sliced(self):
"""
Python -> Rust -> Python
"""
a = pyarrow.array(
[None, [1, 2], [4, 5]],
pyarrow.list_(pyarrow.int64(), 2),
).slice(1, 2)
b = arrow_pyarrow_integration_testing.round_trip_array(a)

b.validate(full=True)
assert a.to_pylist() == b.to_pylist()
assert a.type == b.type

def test_dict(self):
"""
Python -> Rust -> Python
Expand Down
4 changes: 2 additions & 2 deletions src/array/binary/ffi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,8 @@ impl<O: Offset, A: ffi::ArrowArrayRef> FromFfi<A> for BinaryArray<O> {
let data_type = array.field().data_type().clone();

let validity = unsafe { array.validity() }?;
let offsets = unsafe { array.buffer::<O>(0) }?;
let values = unsafe { array.buffer::<u8>(1) }?;
let offsets = unsafe { array.buffer::<O>(1) }?;
let values = unsafe { array.buffer::<u8>(2) }?;

Ok(Self::from_data_unchecked(
data_type, offsets, values, validity,
Expand Down
2 changes: 1 addition & 1 deletion src/array/boolean/ffi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ impl<A: ffi::ArrowArrayRef> FromFfi<A> for BooleanArray {
unsafe fn try_from_ffi(array: A) -> Result<Self> {
let data_type = array.field().data_type().clone();
let validity = unsafe { array.validity() }?;
let values = unsafe { array.bitmap(0) }?;
let values = unsafe { array.bitmap(1) }?;
Ok(Self::from_data(data_type, values, validity))
}
}
2 changes: 1 addition & 1 deletion src/array/dictionary/ffi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ impl<K: DictionaryKey, A: ffi::ArrowArrayRef> FromFfi<A> for DictionaryArray<K>
unsafe fn try_from_ffi(array: A) -> Result<Self> {
// keys: similar to PrimitiveArray, but the datatype is the inner one
let validity = unsafe { array.validity() }?;
let values = unsafe { array.buffer::<K>(0) }?;
let values = unsafe { array.buffer::<K>(1) }?;

let keys = PrimitiveArray::<K>::from_data(K::DATA_TYPE, values, validity);
let values = array.dictionary()?.unwrap();
Expand Down
2 changes: 1 addition & 1 deletion src/array/ffi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ pub(crate) unsafe trait ToFfi {

/// Trait describing how a struct imports into itself from the
/// [C data interface](https://arrow.apache.org/docs/format/CDataInterface.html) (FFI).
pub trait FromFfi<T: ffi::ArrowArrayRef>: Sized {
pub(crate) trait FromFfi<T: ffi::ArrowArrayRef>: Sized {
/// Convert itself from FFI.
/// # Safety
/// This function is intrinsically `unsafe` as it requires the FFI to be made according
Expand Down
17 changes: 16 additions & 1 deletion src/array/fixed_size_binary/ffi.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
use crate::{array::ToFfi, bitmap::align};
use crate::{
array::{FromFfi, ToFfi},
bitmap::align,
error::Result,
ffi,
};

use super::FixedSizeBinaryArray;

Expand Down Expand Up @@ -42,3 +47,13 @@ unsafe impl ToFfi for FixedSizeBinaryArray {
}
}
}

impl<A: ffi::ArrowArrayRef> FromFfi<A> for FixedSizeBinaryArray {
unsafe fn try_from_ffi(array: A) -> Result<Self> {
let data_type = array.field().data_type().clone();
let validity = unsafe { array.validity() }?;
let values = unsafe { array.buffer::<u8>(1) }?;

Ok(Self::from_data(data_type, values, validity))
}
}
20 changes: 19 additions & 1 deletion src/array/fixed_size_list/ffi.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,14 @@
use std::sync::Arc;

use super::super::{ffi::ToFfi, Array};
use super::FixedSizeListArray;
use crate::{
array::{
ffi::{FromFfi, ToFfi},
Array,
},
error::Result,
ffi,
};

unsafe impl ToFfi for FixedSizeListArray {
fn buffers(&self) -> Vec<Option<std::ptr::NonNull<u8>>> {
Expand All @@ -25,3 +32,14 @@ unsafe impl ToFfi for FixedSizeListArray {
self.clone()
}
}

impl<A: ffi::ArrowArrayRef> FromFfi<A> for FixedSizeListArray {
unsafe fn try_from_ffi(array: A) -> Result<Self> {
let data_type = array.field().data_type().clone();
let validity = unsafe { array.validity() }?;
let child = unsafe { array.child(0)? };
let values = ffi::try_from(child)?.into();

Ok(Self::from_data(data_type, values, validity))
}
}
2 changes: 1 addition & 1 deletion src/array/list/ffi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ impl<O: Offset, A: ffi::ArrowArrayRef> FromFfi<A> for ListArray<O> {
unsafe fn try_from_ffi(array: A) -> Result<Self> {
let data_type = array.field().data_type().clone();
let validity = unsafe { array.validity() }?;
let offsets = unsafe { array.buffer::<O>(0) }?;
let offsets = unsafe { array.buffer::<O>(1) }?;
let child = unsafe { array.child(0)? };
let values = ffi::try_from(child)?.into();

Expand Down
18 changes: 9 additions & 9 deletions src/array/list/mutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,15 +42,6 @@ impl<O: Offset, M: MutableArray + Default> MutableListArray<O, M> {
validity: None,
}
}

/// Shrinks the capacity of the [`MutableListArray`] to fit its current length.
pub fn shrink_to_fit(&mut self) {
self.values.shrink_to_fit();
self.offsets.shrink_to_fit();
if let Some(validity) = &mut self.validity {
validity.shrink_to_fit()
}
}
}

impl<O: Offset, M: MutableArray + Default> Default for MutableListArray<O, M> {
Expand Down Expand Up @@ -188,6 +179,15 @@ impl<O: Offset, M: MutableArray> MutableListArray<O, M> {
let a: ListArray<O> = self.into();
Arc::new(a)
}

/// Shrinks the capacity of the [`MutableListArray`] to fit its current length.
pub fn shrink_to_fit(&mut self) {
self.values.shrink_to_fit();
self.offsets.shrink_to_fit();
if let Some(validity) = &mut self.validity {
validity.shrink_to_fit()
}
}
}

impl<O: Offset, M: MutableArray + Default + 'static> MutableArray for MutableListArray<O, M> {
Expand Down
2 changes: 1 addition & 1 deletion src/array/map/ffi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ impl<A: ffi::ArrowArrayRef> FromFfi<A> for MapArray {
unsafe fn try_from_ffi(array: A) -> Result<Self> {
let data_type = array.field().data_type().clone();
let validity = unsafe { array.validity() }?;
let offsets = unsafe { array.buffer::<i32>(0) }?;
let offsets = unsafe { array.buffer::<i32>(1) }?;
let child = array.child(0)?;
let values = ffi::try_from(child)?.into();

Expand Down
2 changes: 1 addition & 1 deletion src/array/primitive/ffi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ impl<T: NativeType, A: ffi::ArrowArrayRef> FromFfi<A> for PrimitiveArray<T> {
unsafe fn try_from_ffi(array: A) -> Result<Self> {
let data_type = array.field().data_type().clone();
let validity = unsafe { array.validity() }?;
let values = unsafe { array.buffer::<T>(0) }?;
let values = unsafe { array.buffer::<T>(1) }?;

Ok(Self::from_data(data_type, values, validity))
}
Expand Down
4 changes: 2 additions & 2 deletions src/array/union/ffi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,11 @@ impl<A: ffi::ArrowArrayRef> FromFfi<A> for UnionArray {
let data_type = field.data_type().clone();
let fields = Self::get_fields(field.data_type());

let mut types = unsafe { array.buffer::<i8>(0) }?;
let mut types = unsafe { array.buffer::<i8>(1) }?;
let offsets = if Self::is_sparse(&data_type) {
None
} else {
Some(unsafe { array.buffer::<i32>(1) }?)
Some(unsafe { array.buffer::<i32>(2) }?)
};

let length = array.array().len();
Expand Down
4 changes: 2 additions & 2 deletions src/array/utf8/ffi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,8 @@ impl<O: Offset, A: ffi::ArrowArrayRef> FromFfi<A> for Utf8Array<O> {
unsafe fn try_from_ffi(array: A) -> Result<Self> {
let data_type = array.field().data_type().clone();
let validity = unsafe { array.validity() }?;
let offsets = unsafe { array.buffer::<O>(0) }?;
let values = unsafe { array.buffer::<u8>(1)? };
let offsets = unsafe { array.buffer::<O>(1) }?;
let values = unsafe { array.buffer::<u8>(2)? };

Ok(Self::from_data_unchecked(
data_type, offsets, values, validity,
Expand Down
2 changes: 2 additions & 0 deletions src/ffi/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,10 @@ pub unsafe fn try_from<A: ArrowArrayRef>(array: A) -> Result<Box<dyn Array>> {
LargeUtf8 => Box::new(Utf8Array::<i64>::try_from_ffi(array)?),
Binary => Box::new(BinaryArray::<i32>::try_from_ffi(array)?),
LargeBinary => Box::new(BinaryArray::<i64>::try_from_ffi(array)?),
FixedSizeBinary => Box::new(FixedSizeBinaryArray::try_from_ffi(array)?),
List => Box::new(ListArray::<i32>::try_from_ffi(array)?),
LargeList => Box::new(ListArray::<i64>::try_from_ffi(array)?),
FixedSizeList => Box::new(FixedSizeListArray::try_from_ffi(array)?),
Struct => Box::new(StructArray::try_from_ffi(array)?),
Dictionary(key_type) => {
with_match_physical_dictionary_key_type!(key_type, |$T| {
Expand Down
31 changes: 27 additions & 4 deletions src/ffi/ffi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,20 @@ fn buffer_offset(array: &Ffi_ArrowArray, data_type: &DataType, i: usize) -> usiz
// to fetch offset buffer's len to build the second buffer.
fn buffer_len(array: &Ffi_ArrowArray, data_type: &DataType, i: usize) -> Result<usize> {
Ok(match (data_type.to_physical_type(), i) {
(PhysicalType::FixedSizeBinary, 1) => {
if let DataType::FixedSizeBinary(size) = data_type.to_logical_type() {
*size * (array.offset as usize + array.length as usize)
} else {
unreachable!()
}
}
(PhysicalType::FixedSizeList, 1) => {
if let DataType::FixedSizeList(_, size) = data_type.to_logical_type() {
*size * (array.offset as usize + array.length as usize)
} else {
unreachable!()
}
}
(PhysicalType::Utf8, 1)
| (PhysicalType::LargeUtf8, 1)
| (PhysicalType::Binary, 1)
Expand Down Expand Up @@ -321,7 +335,7 @@ fn create_dictionary(
}
}

pub trait ArrowArrayRef {
pub trait ArrowArrayRef: std::fmt::Debug {
fn deallocation(&self) -> Deallocation {
Deallocation::Foreign(self.parent().clone())
}
Expand All @@ -344,12 +358,11 @@ pub trait ArrowArrayRef {
/// The caller must guarantee that the buffer `index` corresponds to a bitmap.
/// This function assumes that the bitmap created from FFI is valid; this is impossible to prove.
unsafe fn buffer<T: NativeType>(&self, index: usize) -> Result<Buffer<T>> {
// +1 to ignore null bitmap
create_buffer::<T>(
self.array(),
self.field().data_type(),
self.deallocation(),
index + 1,
index,
)
}

Expand All @@ -358,7 +371,7 @@ pub trait ArrowArrayRef {
/// This function assumes that the bitmap created from FFI is valid; this is impossible to prove.
unsafe fn bitmap(&self, index: usize) -> Result<Bitmap> {
// +1 to ignore null bitmap
create_bitmap(self.array(), self.deallocation(), index + 1)
create_bitmap(self.array(), self.deallocation(), index)
}

/// # Safety
Expand All @@ -371,6 +384,8 @@ pub trait ArrowArrayRef {
create_dictionary(self.array(), self.field(), self.parent().clone())
}

fn n_buffers(&self) -> usize;

fn parent(&self) -> &Arc<ArrowArray>;
fn array(&self) -> &Ffi_ArrowArray;
fn field(&self) -> &Field;
Expand Down Expand Up @@ -420,6 +435,10 @@ impl ArrowArrayRef for Arc<ArrowArray> {
fn array(&self) -> &Ffi_ArrowArray {
self.array.as_ref()
}

fn n_buffers(&self) -> usize {
self.array.n_buffers as usize
}
}

#[derive(Debug)]
Expand All @@ -442,6 +461,10 @@ impl<'a> ArrowArrayRef for ArrowArrayChild<'a> {
fn array(&self) -> &Ffi_ArrowArray {
self.array
}

fn n_buffers(&self) -> usize {
self.array.n_buffers as usize
}
}

impl<'a> ArrowArrayChild<'a> {
Expand Down
17 changes: 16 additions & 1 deletion src/ffi/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,9 @@ impl Ffi_ArrowSchema {
DataType::List(field) => {
vec![Box::new(Ffi_ArrowSchema::new(field.as_ref()))]
}
DataType::FixedSizeList(field, _) => {
vec![Box::new(Ffi_ArrowSchema::new(field.as_ref()))]
}
DataType::LargeList(field) => {
vec![Box::new(Ffi_ArrowSchema::new(field.as_ref()))]
}
Expand Down Expand Up @@ -290,6 +293,17 @@ unsafe fn to_data_type(schema: &Ffi_ArrowSchema) -> Result<DataType> {
DataType::Timestamp(TimeUnit::Microsecond, Some(parts[1].to_string()))
} else if parts.len() == 2 && parts[0] == "tsn" {
DataType::Timestamp(TimeUnit::Nanosecond, Some(parts[1].to_string()))
} else if parts.len() == 2 && parts[0] == "w" {
let size = parts[1]
.parse::<usize>()
.map_err(|_| ArrowError::Ffi("size is not a valid integer".to_string()))?;
DataType::FixedSizeBinary(size)
} else if parts.len() == 2 && parts[0] == "+w" {
let size = parts[1]
.parse::<usize>()
.map_err(|_| ArrowError::Ffi("size is not a valid integer".to_string()))?;
let child = to_field(schema.child(0))?;
DataType::FixedSizeList(Box::new(child), size)
} else if parts.len() == 2 && parts[0] == "d" {
let parts = parts[1].split(',').collect::<Vec<_>>();
if parts.len() < 2 || parts.len() > 3 {
Expand Down Expand Up @@ -395,7 +409,7 @@ fn to_format(data_type: &DataType) -> String {
DataType::List(_) => "+l".to_string(),
DataType::LargeList(_) => "+L".to_string(),
DataType::Struct(_) => "+s".to_string(),
DataType::FixedSizeBinary(size) => format!("w{}", size),
DataType::FixedSizeBinary(size) => format!("w:{}", size),
DataType::FixedSizeList(_, size) => format!("+w:{}", size),
DataType::Union(f, ids, mode) => {
let sparsness = if mode.is_sparse() { 's' } else { 'd' };
Expand All @@ -419,6 +433,7 @@ fn to_format(data_type: &DataType) -> String {
pub(super) fn get_field_child(field: &Field, index: usize) -> Result<Field> {
match (index, field.data_type()) {
(0, DataType::List(field)) => Ok(field.as_ref().clone()),
(0, DataType::FixedSizeList(field, _)) => Ok(field.as_ref().clone()),
(0, DataType::LargeList(field)) => Ok(field.as_ref().clone()),
(0, DataType::Map(field, _)) => Ok(field.as_ref().clone()),
(index, DataType::Struct(fields)) => Ok(fields[index].clone()),
Expand Down
16 changes: 16 additions & 0 deletions tests/it/ffi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,22 @@ fn list() -> Result<()> {
test_round_trip(array)
}

#[test]
fn fixed_list() -> Result<()> {
let data = vec![
Some(vec![Some(1i32), Some(2), Some(3)]),
None,
Some(vec![Some(4), None, Some(6)]),
];

let mut array = MutableFixedSizeListArray::new(MutablePrimitiveArray::<i32>::new(), 3);
array.try_extend(data)?;

let array: FixedSizeListArray = array.into();

test_round_trip(array)
}

#[test]
fn list_list() -> Result<()> {
let data = vec![
Expand Down

0 comments on commit 599b8ac

Please sign in to comment.