From e51c9182fb0e9812eb215f9eed0fe6feabd9e181 Mon Sep 17 00:00:00 2001 From: "Jorge C. Leitao" Date: Mon, 1 Nov 2021 16:58:13 +0000 Subject: [PATCH] Fixed --- src/array/binary/ffi.rs | 23 +++-------------------- src/array/boolean/ffi.rs | 13 ++----------- src/array/dictionary/ffi.rs | 11 ++--------- src/array/list/ffi.rs | 12 +++--------- src/array/map/ffi.rs | 21 ++++++++++++--------- src/array/primitive/ffi.rs | 10 ++-------- src/array/struct_.rs | 13 ++++--------- src/array/union/ffi.rs | 4 ++-- src/array/utf8/ffi.rs | 12 +++--------- src/ffi/ffi.rs | 20 ++++++++++---------- 10 files changed, 43 insertions(+), 96 deletions(-) diff --git a/src/array/binary/ffi.rs b/src/array/binary/ffi.rs index 2aa39d3412a..2e4e94f3858 100644 --- a/src/array/binary/ffi.rs +++ b/src/array/binary/ffi.rs @@ -1,7 +1,6 @@ use crate::{ array::{FromFfi, Offset, ToFfi}, bitmap::align, - datatypes::DataType, ffi, }; @@ -54,29 +53,13 @@ unsafe impl ToFfi for BinaryArray { impl FromFfi for BinaryArray { unsafe fn try_from_ffi(array: A) -> Result { let data_type = array.field().data_type().clone(); - let expected = if O::is_large() { - DataType::LargeBinary - } else { - DataType::Binary - }; - assert_eq!(data_type, expected); - let length = array.array().len(); - let offset = array.array().offset(); - let mut validity = unsafe { array.validity() }?; - let mut offsets = unsafe { array.buffer::(0) }?; + let validity = unsafe { array.validity() }?; + let offsets = unsafe { array.buffer::(0) }?; let values = unsafe { array.buffer::(1) }?; - if offset > 0 { - offsets = offsets.slice(offset, length); - validity = validity.map(|x| x.slice(offset, length)) - } - Ok(Self::from_data_unchecked( - Self::default_data_type(), - offsets, - values, - validity, + data_type, offsets, values, validity, )) } } diff --git a/src/array/boolean/ffi.rs b/src/array/boolean/ffi.rs index a396a466eeb..c042d538aab 100644 --- a/src/array/boolean/ffi.rs +++ b/src/array/boolean/ffi.rs @@ -1,7 +1,6 @@ use crate::{ array::{FromFfi, ToFfi}, bitmap::align, - datatypes::DataType, ffi, }; @@ -52,16 +51,8 @@ unsafe impl ToFfi for BooleanArray { impl FromFfi for BooleanArray { unsafe fn try_from_ffi(array: A) -> Result { let data_type = array.field().data_type().clone(); - assert_eq!(data_type, DataType::Boolean); - let length = array.array().len(); - let offset = array.array().offset(); - let mut validity = unsafe { array.validity() }?; - let mut values = unsafe { array.bitmap(0) }?; - - if offset > 0 { - values = values.slice(offset, length); - validity = validity.map(|x| x.slice(offset, length)) - } + let validity = unsafe { array.validity() }?; + let values = unsafe { array.bitmap(0) }?; Ok(Self::from_data(data_type, values, validity)) } } diff --git a/src/array/dictionary/ffi.rs b/src/array/dictionary/ffi.rs index 6c97539f402..a32d88b3c87 100644 --- a/src/array/dictionary/ffi.rs +++ b/src/array/dictionary/ffi.rs @@ -27,17 +27,10 @@ unsafe impl ToFfi for DictionaryArray { impl FromFfi for DictionaryArray { unsafe fn try_from_ffi(array: A) -> Result { // keys: similar to PrimitiveArray, but the datatype is the inner one - let length = array.array().len(); - let offset = array.array().offset(); - let mut validity = unsafe { array.validity() }?; - let mut values = unsafe { array.buffer::(0) }?; + let validity = unsafe { array.validity() }?; + let values = unsafe { array.buffer::(0) }?; - if offset > 0 { - values = values.slice(offset, length); - validity = validity.map(|x| x.slice(offset, length)) - } let keys = PrimitiveArray::::from_data(K::DATA_TYPE, values, validity); - // values let values = array.dictionary()?.unwrap(); let values = ffi::try_from(values)?.into(); diff --git a/src/array/list/ffi.rs b/src/array/list/ffi.rs index 3168242228a..1f511730ac0 100644 --- a/src/array/list/ffi.rs +++ b/src/array/list/ffi.rs @@ -53,17 +53,11 @@ unsafe impl ToFfi for ListArray { impl FromFfi for ListArray { unsafe fn try_from_ffi(array: A) -> Result { let data_type = array.field().data_type().clone(); - let length = array.array().len(); - let offset = array.array().offset(); - let mut validity = unsafe { array.validity() }?; - let mut offsets = unsafe { array.buffer::(0) }?; - let child = array.child(0)?; + let validity = unsafe { array.validity() }?; + let offsets = unsafe { array.buffer::(0) }?; + let child = unsafe { array.child(0)? }; let values = ffi::try_from(child)?.into(); - if offset > 0 { - offsets = offsets.slice(offset, length); - validity = validity.map(|x| x.slice(offset, length)) - } Ok(Self::from_data(data_type, offsets, values, validity)) } } diff --git a/src/array/map/ffi.rs b/src/array/map/ffi.rs index f87f473d5f8..12a56f5251d 100644 --- a/src/array/map/ffi.rs +++ b/src/array/map/ffi.rs @@ -18,7 +18,16 @@ unsafe impl ToFfi for MapArray { } fn offset(&self) -> Option { - todo!() + let offset = self.offsets.offset(); + if let Some(bitmap) = self.validity.as_ref() { + if bitmap.offset() == offset { + Some(offset) + } else { + None + } + } else { + Some(offset) + } } fn to_ffi_aligned(&self) -> Self { @@ -44,17 +53,11 @@ unsafe impl ToFfi for MapArray { impl FromFfi for MapArray { unsafe fn try_from_ffi(array: A) -> Result { let data_type = array.field().data_type().clone(); - let length = array.array().len(); - let offset = array.array().offset(); - let mut validity = unsafe { array.validity() }?; - let mut offsets = unsafe { array.buffer::(0) }?; + let validity = unsafe { array.validity() }?; + let offsets = unsafe { array.buffer::(0) }?; let child = array.child(0)?; let values = ffi::try_from(child)?.into(); - if offset > 0 { - offsets = offsets.slice(offset, length); - validity = validity.map(|x| x.slice(offset, length)) - } Ok(Self::from_data(data_type, offsets, values, validity)) } } diff --git a/src/array/primitive/ffi.rs b/src/array/primitive/ffi.rs index 5eef467d06a..0e8b3e74a10 100644 --- a/src/array/primitive/ffi.rs +++ b/src/array/primitive/ffi.rs @@ -52,15 +52,9 @@ unsafe impl ToFfi for PrimitiveArray { impl FromFfi for PrimitiveArray { unsafe fn try_from_ffi(array: A) -> Result { let data_type = array.field().data_type().clone(); - let length = array.array().len(); - let offset = array.array().offset(); - let mut validity = unsafe { array.validity() }?; - let mut values = unsafe { array.buffer::(0) }?; + let validity = unsafe { array.validity() }?; + let values = unsafe { array.buffer::(0) }?; - if offset > 0 { - values = values.slice(offset, length); - validity = validity.map(|x| x.slice(offset, length)) - } Ok(Self::from_data(data_type, values, validity)) } } diff --git a/src/array/struct_.rs b/src/array/struct_.rs index 00cb1042ad2..6e767ae712f 100644 --- a/src/array/struct_.rs +++ b/src/array/struct_.rs @@ -243,12 +243,10 @@ unsafe impl ToFfi for StructArray { impl FromFfi for StructArray { unsafe fn try_from_ffi(array: A) -> Result { - let field = array.field(); - let fields = Self::get_fields(field.data_type()).to_vec(); + let data_type = array.field().data_type().clone(); + let fields = Self::get_fields(&data_type); - let length = array.array().len(); - let offset = array.array().offset(); - let mut validity = unsafe { array.validity() }?; + let validity = unsafe { array.validity() }?; let values = (0..fields.len()) .map(|index| { let child = array.child(index)?; @@ -256,9 +254,6 @@ impl FromFfi for StructArray { }) .collect::>>>()?; - if offset > 0 { - validity = validity.map(|x| x.slice(offset, length)) - } - Ok(Self::from_data(DataType::Struct(fields), values, validity)) + Ok(Self::from_data(data_type, values, validity)) } } diff --git a/src/array/union/ffi.rs b/src/array/union/ffi.rs index 6fc8ee99f2e..fbab5dd1596 100644 --- a/src/array/union/ffi.rs +++ b/src/array/union/ffi.rs @@ -23,11 +23,11 @@ unsafe impl ToFfi for UnionArray { } fn offset(&self) -> Option { - todo!() + Some(self.types.offset()) } fn to_ffi_aligned(&self) -> Self { - todo!() + self.clone() } } diff --git a/src/array/utf8/ffi.rs b/src/array/utf8/ffi.rs index c2c03fa4573..59b1a60ce95 100644 --- a/src/array/utf8/ffi.rs +++ b/src/array/utf8/ffi.rs @@ -51,17 +51,11 @@ unsafe impl ToFfi for Utf8Array { impl FromFfi for Utf8Array { unsafe fn try_from_ffi(array: A) -> Result { - let length = array.array().len(); - let offset = array.array().offset(); - let mut validity = unsafe { array.validity() }?; - let mut offsets = unsafe { array.buffer::(0) }?; + let data_type = array.field().data_type().clone(); + let validity = unsafe { array.validity() }?; + let offsets = unsafe { array.buffer::(0) }?; let values = unsafe { array.buffer::(1)? }; - if offset > 0 { - offsets = offsets.slice(offset, length); - validity = validity.map(|x| x.slice(offset, length)) - } - let data_type = Self::default_data_type(); Ok(Self::from_data_unchecked( data_type, offsets, values, validity, )) diff --git a/src/ffi/ffi.rs b/src/ffi/ffi.rs index 97e5fa54cc1..33ffadc7245 100644 --- a/src/ffi/ffi.rs +++ b/src/ffi/ffi.rs @@ -183,7 +183,6 @@ unsafe fn create_buffer( deallocation: Deallocation, index: usize, ) -> Result> { - println!("{:#?}", array); if array.buffers.is_null() { return Err(ArrowError::Ffi("The array buffers are null".to_string())); } @@ -195,12 +194,12 @@ unsafe fn create_buffer( let ptr = NonNull::new(ptr as *mut T); let len = buffer_len(array, data_type, index)?; + let offset = buffer_offset(array, data_type, index); let bytes = ptr .map(|ptr| Bytes::new(ptr, len, deallocation)) - .ok_or_else(|| ArrowError::Ffi(format!("The buffer at position {} is null", index))); + .ok_or_else(|| ArrowError::Ffi(format!("The buffer at position {} is null", index)))?; - println!("{:?}", bytes); - bytes.map(Buffer::from_bytes) + Ok(Buffer::from_bytes(bytes).slice(offset, len - offset)) } /// returns a new buffer corresponding to the index `i` of the FFI array. It may not exist (null pointer). @@ -236,7 +235,7 @@ unsafe fn create_bitmap( )) })?; - Ok(Bitmap::from_bytes(bytes, offset + len)) + Ok(Bitmap::from_bytes(bytes, offset + len).slice(offset, len)) } fn buffer_offset(array: &Ffi_ArrowArray, data_type: &DataType, i: usize) -> usize { @@ -266,24 +265,23 @@ fn buffer_len(array: &Ffi_ArrowArray, data_type: &DataType, i: usize) -> Result< (PhysicalType::Utf8, 2) | (PhysicalType::Binary, 2) => { // the len of the data buffer (buffer 2) equals the last value of the offset buffer (buffer 1) let len = buffer_len(array, data_type, 1)?; - let offset = buffer_offset(array, data_type, 1); // first buffer is the null buffer => add(1) let offset_buffer = unsafe { *(array.buffers as *mut *const u8).add(1) }; // interpret as i32 let offset_buffer = offset_buffer as *const i32; // get last offset - (unsafe { *offset_buffer.add(offset + len - 1) }) as usize + + (unsafe { *offset_buffer.add(len - 1) }) as usize } (PhysicalType::LargeUtf8, 2) | (PhysicalType::LargeBinary, 2) => { // the len of the data buffer (buffer 2) equals the last value of the offset buffer (buffer 1) let len = buffer_len(array, data_type, 1)?; - let offset = buffer_offset(array, data_type, 1); // first buffer is the null buffer => add(1) let offset_buffer = unsafe { *(array.buffers as *mut *const u8).add(1) }; // interpret as i64 let offset_buffer = offset_buffer as *const i64; // get last offset - (unsafe { *offset_buffer.add(offset + len - 1) }) as usize + (unsafe { *offset_buffer.add(len - 1) }) as usize } // buffer len of primitive types _ => array.offset as usize + array.length as usize, @@ -363,7 +361,9 @@ pub trait ArrowArrayRef { create_bitmap(self.array(), self.deallocation(), index + 1) } - fn child(&self, index: usize) -> Result { + /// # Safety + /// The caller must guarantee that the child `index` is valid per c data interface. + unsafe fn child(&self, index: usize) -> Result { create_child(self.array(), self.field(), self.parent().clone(), index) }