Skip to content

Commit

Permalink
feat: implemented with_field() for FixedSizeListBuilder (#5541)
Browse files Browse the repository at this point in the history
* feat: implemented with_field() for FixedSizeListBuilder

* Switched back to build_unchecked, assertions on array_data

* decresed code repetition

* Fixed null logic

* Added unit tests for empty with field

* Fixed clippy issue
  • Loading branch information
istvan-fodor authored Apr 9, 2024
1 parent 51c1b4b commit 1b3d1a9
Showing 1 changed file with 249 additions and 33 deletions.
282 changes: 249 additions & 33 deletions arrow-array/src/builder/fixed_size_list_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ use crate::builder::ArrayBuilder;
use crate::{ArrayRef, FixedSizeListArray};
use arrow_buffer::NullBufferBuilder;
use arrow_data::ArrayData;
use arrow_schema::{DataType, Field};
use arrow_schema::{DataType, Field, FieldRef};
use std::any::Any;
use std::sync::Arc;

Expand Down Expand Up @@ -67,6 +67,7 @@ pub struct FixedSizeListBuilder<T: ArrayBuilder> {
null_buffer_builder: NullBufferBuilder,
values_builder: T,
list_len: i32,
field: Option<FieldRef>,
}

impl<T: ArrayBuilder> FixedSizeListBuilder<T> {
Expand All @@ -89,6 +90,20 @@ impl<T: ArrayBuilder> FixedSizeListBuilder<T> {
null_buffer_builder: NullBufferBuilder::new(capacity),
values_builder,
list_len: value_length,
field: None,
}
}

/// Override the field passed to [`ArrayData::builder`]
///
/// By default a nullable field is created with the name `item`
///
/// Note: [`Self::finish`] and [`Self::finish_cloned`] will panic if the
/// field's data type does not match that of `T`
pub fn with_field(self, field: impl Into<FieldRef>) -> Self {
Self {
field: Some(field.into()),
..self
}
}
}
Expand Down Expand Up @@ -166,13 +181,40 @@ where
);

let nulls = self.null_buffer_builder.finish();
let array_data = ArrayData::builder(DataType::FixedSizeList(
Arc::new(Field::new("item", values_data.data_type().clone(), true)),
self.list_len,
))
.len(len)
.add_child_data(values_data)
.nulls(nulls);

let field = match &self.field {
Some(f) => {
let size = self.value_length();
assert_eq!(
f.data_type(),
values_data.data_type(),
"DataType of field ({}) should be the same as the values_builder DataType ({})",
f.data_type(),
values_data.data_type()
);

if let Some(a) = values_arr.logical_nulls() {
let nulls_valid = f.is_nullable()
|| nulls
.as_ref()
.map(|n| n.expand(size as _).contains(&a))
.unwrap_or_default();

assert!(
nulls_valid,
"Found unmasked nulls for non-nullable FixedSizeListBuilder field {:?}",
f.name()
);
}
f.clone()
}
None => Arc::new(Field::new("item", values_data.data_type().clone(), true)),
};

let array_data = ArrayData::builder(DataType::FixedSizeList(field, self.list_len))
.len(len)
.add_child_data(values_data)
.nulls(nulls);

let array_data = unsafe { array_data.build_unchecked() };

Expand All @@ -194,13 +236,39 @@ where
);

let nulls = self.null_buffer_builder.finish_cloned();
let array_data = ArrayData::builder(DataType::FixedSizeList(
Arc::new(Field::new("item", values_data.data_type().clone(), true)),
self.list_len,
))
.len(len)
.add_child_data(values_data)
.nulls(nulls);

let field = match &self.field {
Some(f) => {
let size = self.value_length();
assert_eq!(
f.data_type(),
values_data.data_type(),
"DataType of field ({}) should be the same as the values_builder DataType ({})",
f.data_type(),
values_data.data_type()
);
if let Some(a) = values_arr.logical_nulls() {
let nulls_valid = f.is_nullable()
|| nulls
.as_ref()
.map(|n| n.expand(size as _).contains(&a))
.unwrap_or_default();

assert!(
nulls_valid,
"Found unmasked nulls for non-nullable FixedSizeListBuilder field {:?}",
f.name()
);
}
f.clone()
}
None => Arc::new(Field::new("item", values_data.data_type().clone(), true)),
};

let array_data = ArrayData::builder(DataType::FixedSizeList(field, self.list_len))
.len(len)
.add_child_data(values_data)
.nulls(nulls);

let array_data = unsafe { array_data.build_unchecked() };

Expand All @@ -216,28 +284,54 @@ mod tests {
use crate::Array;
use crate::Int32Array;

#[test]
fn test_fixed_size_list_array_builder() {
fn make_list_builder(
include_null_element: bool,
include_null_in_values: bool,
) -> FixedSizeListBuilder<crate::builder::PrimitiveBuilder<crate::types::Int32Type>> {
let values_builder = Int32Builder::new();
let mut builder = FixedSizeListBuilder::new(values_builder, 3);

// [[0, 1, 2], null, [3, null, 5], [6, 7, null]]
builder.values().append_value(0);
builder.values().append_value(1);
builder.values().append_value(2);
builder.append(true);
builder.values().append_null();
builder.values().append_null();
builder.values().append_null();
builder.append(false);

builder.values().append_value(2);
builder.values().append_value(3);
builder.values().append_null();
builder.values().append_value(5);
builder.append(true);
builder.values().append_value(6);
builder.values().append_value(7);
builder.values().append_null();
builder.values().append_value(4);
builder.append(true);

if include_null_element {
builder.values().append_null();
builder.values().append_null();
builder.values().append_null();
builder.append(false);
} else {
builder.values().append_value(2);
builder.values().append_value(3);
builder.values().append_value(4);
builder.append(true);
}

if include_null_in_values {
builder.values().append_value(3);
builder.values().append_null();
builder.values().append_value(5);
builder.append(true);
} else {
builder.values().append_value(3);
builder.values().append_value(4);
builder.values().append_value(5);
builder.append(true);
}

builder
}

#[test]
fn test_fixed_size_list_array_builder() {
let mut builder = make_list_builder(true, true);

let list_array = builder.finish();

assert_eq!(DataType::Int32, list_array.value_type());
Expand All @@ -248,9 +342,48 @@ mod tests {
}

#[test]
fn test_fixed_size_list_array_builder_finish_cloned() {
fn test_fixed_size_list_array_builder_with_field() {
let builder = make_list_builder(false, false);
let mut builder = builder.with_field(Field::new("list_element", DataType::Int32, false));
let list_array = builder.finish();

assert_eq!(DataType::Int32, list_array.value_type());
assert_eq!(4, list_array.len());
assert_eq!(0, list_array.null_count());
assert_eq!(6, list_array.value_offset(2));
assert_eq!(3, list_array.value_length());
}

#[test]
fn test_fixed_size_list_array_builder_with_field_and_null() {
let builder = make_list_builder(true, false);
let mut builder = builder.with_field(Field::new("list_element", DataType::Int32, false));
let list_array = builder.finish();

assert_eq!(DataType::Int32, list_array.value_type());
assert_eq!(4, list_array.len());
assert_eq!(1, list_array.null_count());
assert_eq!(6, list_array.value_offset(2));
assert_eq!(3, list_array.value_length());
}

#[test]
#[should_panic(expected = "Found unmasked nulls for non-nullable FixedSizeListBuilder field")]
fn test_fixed_size_list_array_builder_with_field_null_panic() {
let builder = make_list_builder(true, true);
let mut builder = builder.with_field(Field::new("list_item", DataType::Int32, false));

builder.finish();
}

#[test]
#[should_panic(
expected = "DataType of field (Int64) should be the same as the values_builder DataType (Int32)"
)]
fn test_fixed_size_list_array_builder_with_field_type_panic() {
let values_builder = Int32Builder::new();
let mut builder = FixedSizeListBuilder::new(values_builder, 3);
let builder = FixedSizeListBuilder::new(values_builder, 3);
let mut builder = builder.with_field(Field::new("list_item", DataType::Int64, true));

// [[0, 1, 2], null, [3, null, 5], [6, 7, null]]
builder.values().append_value(0);
Expand All @@ -262,13 +395,68 @@ mod tests {
builder.values().append_null();
builder.append(false);
builder.values().append_value(3);
builder.values().append_null();
builder.values().append_value(4);
builder.values().append_value(5);
builder.append(true);

builder.finish();
}

#[test]
fn test_fixed_size_list_array_builder_cloned_with_field() {
let builder = make_list_builder(true, true);
let builder = builder.with_field(Field::new("list_element", DataType::Int32, true));

let list_array = builder.finish_cloned();

assert_eq!(DataType::Int32, list_array.value_type());
assert_eq!(4, list_array.len());
assert_eq!(1, list_array.null_count());
assert_eq!(6, list_array.value_offset(2));
assert_eq!(3, list_array.value_length());
}

#[test]
#[should_panic(expected = "Found unmasked nulls for non-nullable FixedSizeListBuilder field")]
fn test_fixed_size_list_array_builder_cloned_with_field_null_panic() {
let builder = make_list_builder(true, true);
let builder = builder.with_field(Field::new("list_item", DataType::Int32, false));

builder.finish_cloned();
}

#[test]
fn test_fixed_size_list_array_builder_cloned_with_field_and_null() {
let builder = make_list_builder(true, false);
let mut builder = builder.with_field(Field::new("list_element", DataType::Int32, false));
let list_array = builder.finish();

assert_eq!(DataType::Int32, list_array.value_type());
assert_eq!(4, list_array.len());
assert_eq!(1, list_array.null_count());
assert_eq!(6, list_array.value_offset(2));
assert_eq!(3, list_array.value_length());
}

#[test]
#[should_panic(
expected = "DataType of field (Int64) should be the same as the values_builder DataType (Int32)"
)]
fn test_fixed_size_list_array_builder_cloned_with_field_type_panic() {
let builder = make_list_builder(false, false);
let builder = builder.with_field(Field::new("list_item", DataType::Int64, true));

builder.finish_cloned();
}

#[test]
fn test_fixed_size_list_array_builder_finish_cloned() {
let mut builder = make_list_builder(true, true);

let mut list_array = builder.finish_cloned();

assert_eq!(DataType::Int32, list_array.value_type());
assert_eq!(3, list_array.len());
assert_eq!(4, list_array.len());
assert_eq!(1, list_array.null_count());
assert_eq!(3, list_array.value_length());

Expand All @@ -283,12 +471,40 @@ mod tests {
list_array = builder.finish();

assert_eq!(DataType::Int32, list_array.value_type());
assert_eq!(5, list_array.len());
assert_eq!(6, list_array.len());
assert_eq!(2, list_array.null_count());
assert_eq!(6, list_array.value_offset(2));
assert_eq!(3, list_array.value_length());
}

#[test]
fn test_fixed_size_list_array_builder_with_field_empty() {
let values_builder = Int32Array::builder(0);
let mut builder = FixedSizeListBuilder::new(values_builder, 3).with_field(Field::new(
"list_item",
DataType::Int32,
false,
));
assert!(builder.is_empty());
let arr = builder.finish();
assert_eq!(0, arr.len());
assert_eq!(0, builder.len());
}

#[test]
fn test_fixed_size_list_array_builder_cloned_with_field_empty() {
let values_builder = Int32Array::builder(0);
let builder = FixedSizeListBuilder::new(values_builder, 3).with_field(Field::new(
"list_item",
DataType::Int32,
false,
));
assert!(builder.is_empty());
let arr = builder.finish_cloned();
assert_eq!(0, arr.len());
assert_eq!(0, builder.len());
}

#[test]
fn test_fixed_size_list_array_builder_empty() {
let values_builder = Int32Array::builder(5);
Expand Down

0 comments on commit 1b3d1a9

Please sign in to comment.