From 999c7468e6f0c03ae45fa47a863ea478c6f2abb7 Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies Date: Tue, 9 Apr 2024 10:09:07 +0100 Subject: [PATCH 1/3] Use FixedSizeListArray::new in FixedSizeListBuilder --- .../src/builder/fixed_size_list_builder.rs | 114 ++++-------------- 1 file changed, 22 insertions(+), 92 deletions(-) diff --git a/arrow-array/src/builder/fixed_size_list_builder.rs b/arrow-array/src/builder/fixed_size_list_builder.rs index 3d79a763e9d8..ef1183ed5c93 100644 --- a/arrow-array/src/builder/fixed_size_list_builder.rs +++ b/arrow-array/src/builder/fixed_size_list_builder.rs @@ -18,7 +18,6 @@ use crate::builder::ArrayBuilder; use crate::{ArrayRef, FixedSizeListArray}; use arrow_buffer::NullBufferBuilder; -use arrow_data::ArrayData; use arrow_schema::{DataType, Field, FieldRef}; use std::any::Any; use std::sync::Arc; @@ -169,110 +168,45 @@ where /// Builds the [`FixedSizeListBuilder`] and reset this builder. pub fn finish(&mut self) -> FixedSizeListArray { let len = self.len(); - let values_arr = self.values_builder.finish(); - let values_data = values_arr.to_data(); + let values = self.values_builder.finish(); + let nulls = self.null_buffer_builder.finish(); assert_eq!( - values_data.len(), len * self.list_len as usize, + values.len(), len * self.list_len as usize, "Length of the child array ({}) must be the multiple of the value length ({}) and the array length ({}).", - values_data.len(), + values.len(), self.list_len, len, ); - let nulls = self.null_buffer_builder.finish(); + let field = self + .field + .clone() + .unwrap_or_else(|| Arc::new(Field::new("item", values.data_type().clone(), true))); - let field = match &self.field { - Some(f) => { - let size = self.value_length(); - assert_eq!( - f.data_type(), - values_data.data_type(), - "DataType of field ({}) should be the same as the values_builder DataType ({})", - f.data_type(), - values_data.data_type() - ); - - if let Some(a) = values_arr.logical_nulls() { - let nulls_valid = f.is_nullable() - || nulls - .as_ref() - .map(|n| n.expand(size as _).contains(&a)) - .unwrap_or_default(); - - assert!( - nulls_valid, - "Found unmasked nulls for non-nullable FixedSizeListBuilder field {:?}", - f.name() - ); - } - f.clone() - } - None => Arc::new(Field::new("item", values_data.data_type().clone(), true)), - }; - - let array_data = ArrayData::builder(DataType::FixedSizeList(field, self.list_len)) - .len(len) - .add_child_data(values_data) - .nulls(nulls); - - let array_data = unsafe { array_data.build_unchecked() }; - - FixedSizeListArray::from(array_data) + FixedSizeListArray::new(field, self.list_len, values, nulls) } /// Builds the [`FixedSizeListBuilder`] without resetting the builder. pub fn finish_cloned(&self) -> FixedSizeListArray { let len = self.len(); - let values_arr = self.values_builder.finish_cloned(); - let values_data = values_arr.to_data(); + let values = self.values_builder.finish_cloned(); + let nulls = self.null_buffer_builder.finish_cloned(); assert_eq!( - values_data.len(), len * self.list_len as usize, + values.len(), len * self.list_len as usize, "Length of the child array ({}) must be the multiple of the value length ({}) and the array length ({}).", - values_data.len(), + values.len(), self.list_len, len, ); - let nulls = self.null_buffer_builder.finish_cloned(); + let field = self + .field + .clone() + .unwrap_or_else(|| Arc::new(Field::new("item", values.data_type().clone(), true))); - let field = match &self.field { - Some(f) => { - let size = self.value_length(); - assert_eq!( - f.data_type(), - values_data.data_type(), - "DataType of field ({}) should be the same as the values_builder DataType ({})", - f.data_type(), - values_data.data_type() - ); - if let Some(a) = values_arr.logical_nulls() { - let nulls_valid = f.is_nullable() - || nulls - .as_ref() - .map(|n| n.expand(size as _).contains(&a)) - .unwrap_or_default(); - - assert!( - nulls_valid, - "Found unmasked nulls for non-nullable FixedSizeListBuilder field {:?}", - f.name() - ); - } - f.clone() - } - None => Arc::new(Field::new("item", values_data.data_type().clone(), true)), - }; - - let array_data = ArrayData::builder(DataType::FixedSizeList(field, self.list_len)) - .len(len) - .add_child_data(values_data) - .nulls(nulls); - - let array_data = unsafe { array_data.build_unchecked() }; - - FixedSizeListArray::from(array_data) + FixedSizeListArray::new(field, self.list_len, values, nulls) } } @@ -368,7 +302,7 @@ mod tests { } #[test] - #[should_panic(expected = "Found unmasked nulls for non-nullable FixedSizeListBuilder field")] + #[should_panic(expected = "Found unmasked nulls for non-nullable FixedSizeListArray")] fn test_fixed_size_list_array_builder_with_field_null_panic() { let builder = make_list_builder(true, true); let mut builder = builder.with_field(Field::new("list_item", DataType::Int32, false)); @@ -377,9 +311,7 @@ mod tests { } #[test] - #[should_panic( - expected = "DataType of field (Int64) should be the same as the values_builder DataType (Int32)" - )] + #[should_panic(expected = "FixedSizeListArray expected data type Int64 got Int32")] fn test_fixed_size_list_array_builder_with_field_type_panic() { let values_builder = Int32Builder::new(); let builder = FixedSizeListBuilder::new(values_builder, 3); @@ -417,7 +349,7 @@ mod tests { } #[test] - #[should_panic(expected = "Found unmasked nulls for non-nullable FixedSizeListBuilder field")] + #[should_panic(expected = "Found unmasked nulls for non-nullable FixedSizeListArray")] fn test_fixed_size_list_array_builder_cloned_with_field_null_panic() { let builder = make_list_builder(true, true); let builder = builder.with_field(Field::new("list_item", DataType::Int32, false)); @@ -439,9 +371,7 @@ mod tests { } #[test] - #[should_panic( - expected = "DataType of field (Int64) should be the same as the values_builder DataType (Int32)" - )] + #[should_panic(expected = "FixedSizeListArray expected data type Int64 got Int32")] fn test_fixed_size_list_array_builder_cloned_with_field_type_panic() { let builder = make_list_builder(false, false); let builder = builder.with_field(Field::new("list_item", DataType::Int64, true)); From bfc0c6b244d8a7210ff6c037da5f02518e98ecdd Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies Date: Tue, 9 Apr 2024 10:41:16 +0100 Subject: [PATCH 2/3] Fix handling of entirely null zero-sized list array (#5614) --- .../src/array/fixed_size_list_array.rs | 35 +++++++++++++------ .../src/builder/fixed_size_list_builder.rs | 7 ++-- 2 files changed, 29 insertions(+), 13 deletions(-) diff --git a/arrow-array/src/array/fixed_size_list_array.rs b/arrow-array/src/array/fixed_size_list_array.rs index f8f01516e3d4..fdd31c94dc2e 100644 --- a/arrow-array/src/array/fixed_size_list_array.rs +++ b/arrow-array/src/array/fixed_size_list_array.rs @@ -152,16 +152,22 @@ impl FixedSizeListArray { ArrowError::InvalidArgumentError(format!("Size cannot be negative, got {}", size)) })?; - let len = values.len() / s.max(1); - if let Some(n) = nulls.as_ref() { - if n.len() != len { - return Err(ArrowError::InvalidArgumentError(format!( - "Incorrect length of null buffer for FixedSizeListArray, expected {} got {}", - len, - n.len(), - ))); + let len = match s { + 0 => nulls.as_ref().map(|x| x.len()).unwrap_or_default(), + _ => { + let len = values.len() / s.max(1); + if let Some(n) = nulls.as_ref() { + if n.len() != len { + return Err(ArrowError::InvalidArgumentError(format!( + "Incorrect length of null buffer for FixedSizeListArray, expected {} got {}", + len, + n.len(), + ))); + } + } + len } - } + }; if field.data_type() != values.data_type() { return Err(ArrowError::InvalidArgumentError(format!( @@ -460,7 +466,7 @@ mod tests { use crate::cast::AsArray; use crate::types::Int32Type; - use crate::Int32Array; + use crate::{new_empty_array, Int32Array}; use super::*; @@ -674,4 +680,13 @@ mod tests { let err = FixedSizeListArray::try_new(field, 2, values, None).unwrap_err(); assert_eq!(err.to_string(), "Invalid argument error: FixedSizeListArray expected data type Int64 got Int32 for \"item\""); } + + #[test] + fn empty_fixed_size_list() { + let field = Arc::new(Field::new("item", DataType::Int32, true)); + let nulls = NullBuffer::new_null(2); + let values = new_empty_array(&DataType::Int32); + let list = FixedSizeListArray::new(field.clone(), 0, values, Some(nulls)); + assert_eq!(list.len(), 2); + } } diff --git a/arrow-array/src/builder/fixed_size_list_builder.rs b/arrow-array/src/builder/fixed_size_list_builder.rs index ef1183ed5c93..f65947bfff32 100644 --- a/arrow-array/src/builder/fixed_size_list_builder.rs +++ b/arrow-array/src/builder/fixed_size_list_builder.rs @@ -18,7 +18,7 @@ use crate::builder::ArrayBuilder; use crate::{ArrayRef, FixedSizeListArray}; use arrow_buffer::NullBufferBuilder; -use arrow_schema::{DataType, Field, FieldRef}; +use arrow_schema::{Field, FieldRef}; use std::any::Any; use std::sync::Arc; @@ -93,9 +93,9 @@ impl FixedSizeListBuilder { } } - /// Override the field passed to [`ArrayData::builder`] + /// Override the field passed to [`FixedSizeListArray::new`] /// - /// By default a nullable field is created with the name `item` + /// By default, a nullable field is created with the name `item` /// /// Note: [`Self::finish`] and [`Self::finish_cloned`] will panic if the /// field's data type does not match that of `T` @@ -213,6 +213,7 @@ where #[cfg(test)] mod tests { use super::*; + use arrow_schema::DataType; use crate::builder::Int32Builder; use crate::Array; From 7fa6e17b320fdbdc6fc9439f0b56d32be6e52f73 Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies Date: Tue, 9 Apr 2024 10:55:05 +0100 Subject: [PATCH 3/3] Fix --- arrow-array/src/array/fixed_size_list_array.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arrow-array/src/array/fixed_size_list_array.rs b/arrow-array/src/array/fixed_size_list_array.rs index fdd31c94dc2e..1d07daf4cc75 100644 --- a/arrow-array/src/array/fixed_size_list_array.rs +++ b/arrow-array/src/array/fixed_size_list_array.rs @@ -662,7 +662,7 @@ mod tests { ); let list = FixedSizeListArray::new(field.clone(), 0, values.clone(), None); - assert_eq!(list.len(), 6); + assert_eq!(list.len(), 0); let nulls = NullBuffer::new_null(2); let err = FixedSizeListArray::try_new(field, 2, values.clone(), Some(nulls)).unwrap_err();