Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/master' into primitive-nulls
Browse files Browse the repository at this point in the history
  • Loading branch information
jayzhan211 committed Apr 21, 2024
2 parents 85bfbce + cd33319 commit 32cb85e
Show file tree
Hide file tree
Showing 82 changed files with 4,218 additions and 848 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ jobs:
rm website/build/artifact.tar
cp .asf.yaml ./website/build/.asf.yaml
- name: Deploy to gh-pages
uses: peaceiris/actions-gh-pages@v3.9.3
uses: peaceiris/actions-gh-pages@v4.0.0
if: github.event_name == 'push' && github.ref_name == 'master'
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
Expand Down
3 changes: 2 additions & 1 deletion arrow-arith/src/arity.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ use arrow_array::builder::BufferBuilder;
use arrow_array::types::ArrowDictionaryKeyType;
use arrow_array::*;
use arrow_buffer::buffer::NullBuffer;
use arrow_buffer::ArrowNativeType;
use arrow_buffer::{Buffer, MutableBuffer};
use arrow_data::ArrayData;
use arrow_schema::ArrowError;
Expand Down Expand Up @@ -386,7 +387,7 @@ where
O: ArrowPrimitiveType,
F: Fn(A::Item, B::Item) -> Result<O::Native, ArrowError>,
{
let mut buffer = MutableBuffer::new(len * O::get_byte_width());
let mut buffer = MutableBuffer::new(len * O::Native::get_byte_width());
for idx in 0..len {
unsafe {
buffer.push_unchecked(op(a.value_unchecked(idx), b.value_unchecked(idx))?);
Expand Down
97 changes: 93 additions & 4 deletions arrow-array/src/array/byte_view_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -377,6 +377,18 @@ impl<T: ByteViewType + ?Sized> From<GenericByteViewArray<T>> for ArrayData {
}
}

impl<'a, Ptr, T> FromIterator<&'a Option<Ptr>> for GenericByteViewArray<T>
where
Ptr: AsRef<T::Native> + 'a,
T: ByteViewType + ?Sized,
{
fn from_iter<I: IntoIterator<Item = &'a Option<Ptr>>>(iter: I) -> Self {
iter.into_iter()
.map(|o| o.as_ref().map(|p| p.as_ref()))
.collect()
}
}

impl<Ptr, T: ByteViewType + ?Sized> FromIterator<Option<Ptr>> for GenericByteViewArray<T>
where
Ptr: AsRef<T::Native>,
Expand All @@ -400,7 +412,23 @@ where
/// ```
pub type BinaryViewArray = GenericByteViewArray<BinaryViewType>;

/// A [`GenericByteViewArray`] that stores uf8 data
impl BinaryViewArray {
/// Convert the [`BinaryViewArray`] to [`StringViewArray`]
/// If items not utf8 data, validate will fail and error returned.
pub fn to_string_view(self) -> Result<StringViewArray, ArrowError> {
StringViewType::validate(self.views(), self.data_buffers())?;
unsafe { Ok(self.to_string_view_unchecked()) }
}

/// Convert the [`BinaryViewArray`] to [`StringViewArray`]
/// # Safety
/// Caller is responsible for ensuring that items in array are utf8 data.
pub unsafe fn to_string_view_unchecked(self) -> StringViewArray {
StringViewArray::new_unchecked(self.views, self.buffers, self.nulls)
}
}

/// A [`GenericByteViewArray`] that stores utf8 data
///
/// # Example
/// ```
Expand All @@ -411,21 +439,46 @@ pub type BinaryViewArray = GenericByteViewArray<BinaryViewType>;
/// ```
pub type StringViewArray = GenericByteViewArray<StringViewType>;

impl StringViewArray {
/// Convert the [`StringViewArray`] to [`BinaryViewArray`]
pub fn to_binary_view(self) -> BinaryViewArray {
unsafe { BinaryViewArray::new_unchecked(self.views, self.buffers, self.nulls) }
}
}

impl From<Vec<&str>> for StringViewArray {
fn from(v: Vec<&str>) -> Self {
Self::from_iter_values(v)
}
}

impl From<Vec<Option<&str>>> for StringViewArray {
fn from(v: Vec<Option<&str>>) -> Self {
v.into_iter().collect()
}
}

impl From<Vec<String>> for StringViewArray {
fn from(v: Vec<String>) -> Self {
Self::from_iter_values(v)
}
}

impl From<Vec<Option<String>>> for StringViewArray {
fn from(v: Vec<Option<String>>) -> Self {
v.into_iter().collect()
}
}

#[cfg(test)]
mod tests {
use crate::builder::StringViewBuilder;
use crate::builder::{BinaryViewBuilder, StringViewBuilder};
use crate::{Array, BinaryViewArray, StringViewArray};
use arrow_buffer::{Buffer, ScalarBuffer};
use arrow_data::ByteView;

#[test]
fn try_new() {
fn try_new_string() {
let array = StringViewArray::from_iter_values(vec![
"hello",
"world",
Expand All @@ -434,7 +487,10 @@ mod tests {
]);
assert_eq!(array.value(0), "hello");
assert_eq!(array.value(3), "large payload over 12 bytes");
}

#[test]
fn try_new_binary() {
let array = BinaryViewArray::from_iter_values(vec![
b"hello".as_slice(),
b"world".as_slice(),
Expand All @@ -443,14 +499,30 @@ mod tests {
]);
assert_eq!(array.value(0), b"hello");
assert_eq!(array.value(3), b"large payload over 12 bytes");
}

#[test]
fn try_new_empty_string() {
// test empty array
let array = {
let mut builder = StringViewBuilder::new();
builder.finish()
};
assert!(array.is_empty());
}

#[test]
fn try_new_empty_binary() {
// test empty array
let array = {
let mut builder = BinaryViewBuilder::new();
builder.finish()
};
assert!(array.is_empty());
}

#[test]
fn test_append_string() {
// test builder append
let array = {
let mut builder = StringViewBuilder::new();
Expand All @@ -462,8 +534,25 @@ mod tests {
assert_eq!(array.value(0), "hello");
assert!(array.is_null(1));
assert_eq!(array.value(2), "large payload over 12 bytes");
}

#[test]
fn test_append_binary() {
// test builder append
let array = {
let mut builder = BinaryViewBuilder::new();
builder.append_value(b"hello");
builder.append_null();
builder.append_option(Some(b"large payload over 12 bytes"));
builder.finish()
};
assert_eq!(array.value(0), b"hello");
assert!(array.is_null(1));
assert_eq!(array.value(2), b"large payload over 12 bytes");
}

// test builder's in_progress re-created
#[test]
fn test_in_progress_recreation() {
let array = {
// make a builder with small block size.
let mut builder = StringViewBuilder::new().with_block_size(14);
Expand Down
37 changes: 26 additions & 11 deletions arrow-array/src/array/fixed_size_list_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -152,16 +152,22 @@ impl FixedSizeListArray {
ArrowError::InvalidArgumentError(format!("Size cannot be negative, got {}", size))
})?;

let len = values.len() / s.max(1);
if let Some(n) = nulls.as_ref() {
if n.len() != len {
return Err(ArrowError::InvalidArgumentError(format!(
"Incorrect length of null buffer for FixedSizeListArray, expected {} got {}",
len,
n.len(),
)));
let len = match s {
0 => nulls.as_ref().map(|x| x.len()).unwrap_or_default(),
_ => {
let len = values.len() / s.max(1);
if let Some(n) = nulls.as_ref() {
if n.len() != len {
return Err(ArrowError::InvalidArgumentError(format!(
"Incorrect length of null buffer for FixedSizeListArray, expected {} got {}",
len,
n.len(),
)));
}
}
len
}
}
};

if field.data_type() != values.data_type() {
return Err(ArrowError::InvalidArgumentError(format!(
Expand Down Expand Up @@ -460,7 +466,7 @@ mod tests {

use crate::cast::AsArray;
use crate::types::Int32Type;
use crate::Int32Array;
use crate::{new_empty_array, Int32Array};

use super::*;

Expand Down Expand Up @@ -656,7 +662,7 @@ mod tests {
);

let list = FixedSizeListArray::new(field.clone(), 0, values.clone(), None);
assert_eq!(list.len(), 6);
assert_eq!(list.len(), 0);

let nulls = NullBuffer::new_null(2);
let err = FixedSizeListArray::try_new(field, 2, values.clone(), Some(nulls)).unwrap_err();
Expand All @@ -674,4 +680,13 @@ mod tests {
let err = FixedSizeListArray::try_new(field, 2, values, None).unwrap_err();
assert_eq!(err.to_string(), "Invalid argument error: FixedSizeListArray expected data type Int64 got Int32 for \"item\"");
}

#[test]
fn empty_fixed_size_list() {
let field = Arc::new(Field::new("item", DataType::Int32, true));
let nulls = NullBuffer::new_null(2);
let values = new_empty_array(&DataType::Int32);
let list = FixedSizeListArray::new(field.clone(), 0, values, Some(nulls));
assert_eq!(list.len(), 2);
}
}
6 changes: 3 additions & 3 deletions arrow-array/src/array/primitive_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1097,7 +1097,7 @@ impl<T: ArrowPrimitiveType> std::fmt::Debug for PrimitiveArray<T> {
write!(f, "PrimitiveArray<{data_type:?}>\n[\n")?;
print_long_array(self, f, |array, index, f| match data_type {
DataType::Date32 | DataType::Date64 => {
let v = self.value(index).to_isize().unwrap() as i64;
let v = self.value(index).to_i64().unwrap();
match as_date::<T>(v) {
Some(date) => write!(f, "{date:?}"),
None => {
Expand All @@ -1109,7 +1109,7 @@ impl<T: ArrowPrimitiveType> std::fmt::Debug for PrimitiveArray<T> {
}
}
DataType::Time32(_) | DataType::Time64(_) => {
let v = self.value(index).to_isize().unwrap() as i64;
let v = self.value(index).to_i64().unwrap();
match as_time::<T>(v) {
Some(time) => write!(f, "{time:?}"),
None => {
Expand All @@ -1121,7 +1121,7 @@ impl<T: ArrowPrimitiveType> std::fmt::Debug for PrimitiveArray<T> {
}
}
DataType::Timestamp(_, tz_string_opt) => {
let v = self.value(index).to_isize().unwrap() as i64;
let v = self.value(index).to_i64().unwrap();
match tz_string_opt {
// for Timestamp with TimeZone
Some(tz_string) => {
Expand Down
Loading

0 comments on commit 32cb85e

Please sign in to comment.