-
Notifications
You must be signed in to change notification settings - Fork 819
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: implemented with_field() for FixedSizeListBuilder #5541
Changes from all commits
d1bc1b0
83605bb
09741dd
06a8cec
052fd66
ac768f6
355c941
1999530
ed48b9d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,7 +19,7 @@ use crate::builder::ArrayBuilder; | |
use crate::{ArrayRef, FixedSizeListArray}; | ||
use arrow_buffer::NullBufferBuilder; | ||
use arrow_data::ArrayData; | ||
use arrow_schema::{DataType, Field}; | ||
use arrow_schema::{DataType, Field, FieldRef}; | ||
use std::any::Any; | ||
use std::sync::Arc; | ||
|
||
|
@@ -67,6 +67,7 @@ pub struct FixedSizeListBuilder<T: ArrayBuilder> { | |
null_buffer_builder: NullBufferBuilder, | ||
values_builder: T, | ||
list_len: i32, | ||
field: Option<FieldRef>, | ||
} | ||
|
||
impl<T: ArrayBuilder> FixedSizeListBuilder<T> { | ||
|
@@ -89,6 +90,20 @@ impl<T: ArrayBuilder> FixedSizeListBuilder<T> { | |
null_buffer_builder: NullBufferBuilder::new(capacity), | ||
values_builder, | ||
list_len: value_length, | ||
field: None, | ||
} | ||
} | ||
|
||
/// Override the field passed to [`ArrayData::builder`] | ||
/// | ||
/// By default a nullable field is created with the name `item` | ||
/// | ||
/// Note: [`Self::finish`] and [`Self::finish_cloned`] will panic if the | ||
/// field's data type does not match that of `T` | ||
pub fn with_field(self, field: impl Into<FieldRef>) -> Self { | ||
Self { | ||
field: Some(field.into()), | ||
..self | ||
} | ||
} | ||
} | ||
|
@@ -166,13 +181,40 @@ where | |
); | ||
|
||
let nulls = self.null_buffer_builder.finish(); | ||
let array_data = ArrayData::builder(DataType::FixedSizeList( | ||
Arc::new(Field::new("item", values_data.data_type().clone(), true)), | ||
self.list_len, | ||
)) | ||
.len(len) | ||
.add_child_data(values_data) | ||
.nulls(nulls); | ||
|
||
let field = match &self.field { | ||
Some(f) => { | ||
let size = self.value_length(); | ||
assert_eq!( | ||
f.data_type(), | ||
values_data.data_type(), | ||
"DataType of field ({}) should be the same as the values_builder DataType ({})", | ||
f.data_type(), | ||
values_data.data_type() | ||
); | ||
|
||
if let Some(a) = values_arr.logical_nulls() { | ||
let nulls_valid = f.is_nullable() | ||
|| nulls | ||
.as_ref() | ||
.map(|n| n.expand(size as _).contains(&a)) | ||
.unwrap_or_default(); | ||
|
||
assert!( | ||
nulls_valid, | ||
"Found unmasked nulls for non-nullable FixedSizeListBuilder field {:?}", | ||
f.name() | ||
); | ||
} | ||
f.clone() | ||
} | ||
None => Arc::new(Field::new("item", values_data.data_type().clone(), true)), | ||
}; | ||
|
||
let array_data = ArrayData::builder(DataType::FixedSizeList(field, self.list_len)) | ||
.len(len) | ||
.add_child_data(values_data) | ||
.nulls(nulls); | ||
|
||
let array_data = unsafe { array_data.build_unchecked() }; | ||
|
||
|
@@ -194,13 +236,39 @@ where | |
); | ||
|
||
let nulls = self.null_buffer_builder.finish_cloned(); | ||
let array_data = ArrayData::builder(DataType::FixedSizeList( | ||
Arc::new(Field::new("item", values_data.data_type().clone(), true)), | ||
self.list_len, | ||
)) | ||
.len(len) | ||
.add_child_data(values_data) | ||
.nulls(nulls); | ||
|
||
let field = match &self.field { | ||
Some(f) => { | ||
let size = self.value_length(); | ||
assert_eq!( | ||
f.data_type(), | ||
values_data.data_type(), | ||
"DataType of field ({}) should be the same as the values_builder DataType ({})", | ||
f.data_type(), | ||
values_data.data_type() | ||
); | ||
if let Some(a) = values_arr.logical_nulls() { | ||
let nulls_valid = f.is_nullable() | ||
|| nulls | ||
.as_ref() | ||
.map(|n| n.expand(size as _).contains(&a)) | ||
.unwrap_or_default(); | ||
|
||
assert!( | ||
nulls_valid, | ||
"Found unmasked nulls for non-nullable FixedSizeListBuilder field {:?}", | ||
f.name() | ||
); | ||
} | ||
f.clone() | ||
} | ||
None => Arc::new(Field::new("item", values_data.data_type().clone(), true)), | ||
}; | ||
|
||
let array_data = ArrayData::builder(DataType::FixedSizeList(field, self.list_len)) | ||
.len(len) | ||
.add_child_data(values_data) | ||
.nulls(nulls); | ||
|
||
let array_data = unsafe { array_data.build_unchecked() }; | ||
alamb marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
|
@@ -216,28 +284,54 @@ mod tests { | |
use crate::Array; | ||
use crate::Int32Array; | ||
|
||
#[test] | ||
fn test_fixed_size_list_array_builder() { | ||
fn make_list_builder( | ||
include_null_element: bool, | ||
include_null_in_values: bool, | ||
) -> FixedSizeListBuilder<crate::builder::PrimitiveBuilder<crate::types::Int32Type>> { | ||
let values_builder = Int32Builder::new(); | ||
let mut builder = FixedSizeListBuilder::new(values_builder, 3); | ||
|
||
// [[0, 1, 2], null, [3, null, 5], [6, 7, null]] | ||
builder.values().append_value(0); | ||
builder.values().append_value(1); | ||
builder.values().append_value(2); | ||
builder.append(true); | ||
builder.values().append_null(); | ||
builder.values().append_null(); | ||
builder.values().append_null(); | ||
builder.append(false); | ||
|
||
builder.values().append_value(2); | ||
builder.values().append_value(3); | ||
builder.values().append_null(); | ||
builder.values().append_value(5); | ||
builder.append(true); | ||
builder.values().append_value(6); | ||
builder.values().append_value(7); | ||
builder.values().append_null(); | ||
builder.values().append_value(4); | ||
builder.append(true); | ||
|
||
if include_null_element { | ||
builder.values().append_null(); | ||
builder.values().append_null(); | ||
builder.values().append_null(); | ||
builder.append(false); | ||
} else { | ||
builder.values().append_value(2); | ||
builder.values().append_value(3); | ||
builder.values().append_value(4); | ||
builder.append(true); | ||
} | ||
|
||
if include_null_in_values { | ||
builder.values().append_value(3); | ||
builder.values().append_null(); | ||
builder.values().append_value(5); | ||
builder.append(true); | ||
} else { | ||
builder.values().append_value(3); | ||
builder.values().append_value(4); | ||
builder.values().append_value(5); | ||
builder.append(true); | ||
} | ||
|
||
builder | ||
} | ||
|
||
#[test] | ||
fn test_fixed_size_list_array_builder() { | ||
let mut builder = make_list_builder(true, true); | ||
|
||
let list_array = builder.finish(); | ||
|
||
assert_eq!(DataType::Int32, list_array.value_type()); | ||
|
@@ -248,9 +342,48 @@ mod tests { | |
} | ||
|
||
#[test] | ||
fn test_fixed_size_list_array_builder_finish_cloned() { | ||
fn test_fixed_size_list_array_builder_with_field() { | ||
let builder = make_list_builder(false, false); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @alamb , I had a question about this test in particular. With the new capability to set the field as a non-nullable we have the situation where the builder has a null element built like this: builder.values().append_null();
builder.values().append_null();
builder.values().append_null();
builder.append(false); If the field is set to non-nullable, this element will fail the null check. I wanted to check with you if this is logically right or not? Are we making a distinction between nulls in the values builder, where let's say 1 of the 3 elements of a particular fixed size list is null (which would be a valid fail on non-nullability) or nulls in the main builder (which is built up as 3 nulls + a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The logic we apply for FixedSizeList and StructArray elsewhere is to permit "masked" nulls, this is because a null must always take up space. I am not sure if we want to replicate this logic here. See https://docs.rs/arrow-array/latest/arrow_array/array/struct.FixedSizeListArray.html#method.try_new There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I think I would expect it to be consistent with whatever |
||
let mut builder = builder.with_field(Field::new("list_element", DataType::Int32, false)); | ||
let list_array = builder.finish(); | ||
|
||
assert_eq!(DataType::Int32, list_array.value_type()); | ||
assert_eq!(4, list_array.len()); | ||
assert_eq!(0, list_array.null_count()); | ||
assert_eq!(6, list_array.value_offset(2)); | ||
assert_eq!(3, list_array.value_length()); | ||
} | ||
|
||
#[test] | ||
fn test_fixed_size_list_array_builder_with_field_and_null() { | ||
let builder = make_list_builder(true, false); | ||
let mut builder = builder.with_field(Field::new("list_element", DataType::Int32, false)); | ||
let list_array = builder.finish(); | ||
alamb marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
assert_eq!(DataType::Int32, list_array.value_type()); | ||
assert_eq!(4, list_array.len()); | ||
assert_eq!(1, list_array.null_count()); | ||
assert_eq!(6, list_array.value_offset(2)); | ||
assert_eq!(3, list_array.value_length()); | ||
} | ||
|
||
#[test] | ||
#[should_panic(expected = "Found unmasked nulls for non-nullable FixedSizeListBuilder field")] | ||
fn test_fixed_size_list_array_builder_with_field_null_panic() { | ||
let builder = make_list_builder(true, true); | ||
let mut builder = builder.with_field(Field::new("list_item", DataType::Int32, false)); | ||
|
||
builder.finish(); | ||
} | ||
|
||
#[test] | ||
#[should_panic( | ||
expected = "DataType of field (Int64) should be the same as the values_builder DataType (Int32)" | ||
)] | ||
fn test_fixed_size_list_array_builder_with_field_type_panic() { | ||
let values_builder = Int32Builder::new(); | ||
let mut builder = FixedSizeListBuilder::new(values_builder, 3); | ||
let builder = FixedSizeListBuilder::new(values_builder, 3); | ||
let mut builder = builder.with_field(Field::new("list_item", DataType::Int64, true)); | ||
|
||
// [[0, 1, 2], null, [3, null, 5], [6, 7, null]] | ||
builder.values().append_value(0); | ||
|
@@ -262,13 +395,68 @@ mod tests { | |
builder.values().append_null(); | ||
builder.append(false); | ||
builder.values().append_value(3); | ||
builder.values().append_null(); | ||
builder.values().append_value(4); | ||
builder.values().append_value(5); | ||
builder.append(true); | ||
|
||
builder.finish(); | ||
} | ||
|
||
#[test] | ||
fn test_fixed_size_list_array_builder_cloned_with_field() { | ||
let builder = make_list_builder(true, true); | ||
let builder = builder.with_field(Field::new("list_element", DataType::Int32, true)); | ||
|
||
let list_array = builder.finish_cloned(); | ||
|
||
assert_eq!(DataType::Int32, list_array.value_type()); | ||
assert_eq!(4, list_array.len()); | ||
assert_eq!(1, list_array.null_count()); | ||
assert_eq!(6, list_array.value_offset(2)); | ||
assert_eq!(3, list_array.value_length()); | ||
} | ||
|
||
#[test] | ||
#[should_panic(expected = "Found unmasked nulls for non-nullable FixedSizeListBuilder field")] | ||
fn test_fixed_size_list_array_builder_cloned_with_field_null_panic() { | ||
let builder = make_list_builder(true, true); | ||
let builder = builder.with_field(Field::new("list_item", DataType::Int32, false)); | ||
|
||
builder.finish_cloned(); | ||
} | ||
|
||
#[test] | ||
fn test_fixed_size_list_array_builder_cloned_with_field_and_null() { | ||
let builder = make_list_builder(true, false); | ||
let mut builder = builder.with_field(Field::new("list_element", DataType::Int32, false)); | ||
let list_array = builder.finish(); | ||
|
||
assert_eq!(DataType::Int32, list_array.value_type()); | ||
assert_eq!(4, list_array.len()); | ||
assert_eq!(1, list_array.null_count()); | ||
assert_eq!(6, list_array.value_offset(2)); | ||
assert_eq!(3, list_array.value_length()); | ||
} | ||
|
||
#[test] | ||
#[should_panic( | ||
expected = "DataType of field (Int64) should be the same as the values_builder DataType (Int32)" | ||
)] | ||
fn test_fixed_size_list_array_builder_cloned_with_field_type_panic() { | ||
let builder = make_list_builder(false, false); | ||
let builder = builder.with_field(Field::new("list_item", DataType::Int64, true)); | ||
|
||
builder.finish_cloned(); | ||
} | ||
|
||
#[test] | ||
fn test_fixed_size_list_array_builder_finish_cloned() { | ||
let mut builder = make_list_builder(true, true); | ||
|
||
let mut list_array = builder.finish_cloned(); | ||
|
||
assert_eq!(DataType::Int32, list_array.value_type()); | ||
assert_eq!(3, list_array.len()); | ||
assert_eq!(4, list_array.len()); | ||
assert_eq!(1, list_array.null_count()); | ||
assert_eq!(3, list_array.value_length()); | ||
|
||
|
@@ -283,12 +471,40 @@ mod tests { | |
list_array = builder.finish(); | ||
|
||
assert_eq!(DataType::Int32, list_array.value_type()); | ||
assert_eq!(5, list_array.len()); | ||
assert_eq!(6, list_array.len()); | ||
assert_eq!(2, list_array.null_count()); | ||
assert_eq!(6, list_array.value_offset(2)); | ||
assert_eq!(3, list_array.value_length()); | ||
} | ||
|
||
#[test] | ||
fn test_fixed_size_list_array_builder_with_field_empty() { | ||
let values_builder = Int32Array::builder(0); | ||
let mut builder = FixedSizeListBuilder::new(values_builder, 3).with_field(Field::new( | ||
"list_item", | ||
DataType::Int32, | ||
false, | ||
)); | ||
assert!(builder.is_empty()); | ||
let arr = builder.finish(); | ||
assert_eq!(0, arr.len()); | ||
assert_eq!(0, builder.len()); | ||
} | ||
|
||
#[test] | ||
fn test_fixed_size_list_array_builder_cloned_with_field_empty() { | ||
let values_builder = Int32Array::builder(0); | ||
let builder = FixedSizeListBuilder::new(values_builder, 3).with_field(Field::new( | ||
"list_item", | ||
DataType::Int32, | ||
false, | ||
)); | ||
assert!(builder.is_empty()); | ||
let arr = builder.finish_cloned(); | ||
assert_eq!(0, arr.len()); | ||
assert_eq!(0, builder.len()); | ||
} | ||
|
||
#[test] | ||
fn test_fixed_size_list_array_builder_empty() { | ||
let values_builder = Int32Array::builder(5); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
👍