From 1187b9a1d13ddf7224f7dbcf4e785e825dd1c5a6 Mon Sep 17 00:00:00 2001 From: remzi <13716567376yh@gmail.com> Date: Wed, 6 Jul 2022 17:20:51 +0800 Subject: [PATCH 1/3] refine the code Signed-off-by: remzi <13716567376yh@gmail.com> --- .../builder/fixed_size_binary_builder.rs | 109 ++++++++++++++---- 1 file changed, 89 insertions(+), 20 deletions(-) diff --git a/arrow/src/array/builder/fixed_size_binary_builder.rs b/arrow/src/array/builder/fixed_size_binary_builder.rs index 2bbe4bb05100..8cfddd456261 100644 --- a/arrow/src/array/builder/fixed_size_binary_builder.rs +++ b/arrow/src/array/builder/fixed_size_binary_builder.rs @@ -16,24 +16,39 @@ // under the License. use crate::array::{ - ArrayBuilder, ArrayRef, FixedSizeBinaryArray, FixedSizeListBuilder, UInt8Builder, + ArrayBuilder, ArrayData, ArrayRef, FixedSizeBinaryArray, UInt8BufferBuilder, }; +use crate::datatypes::DataType; use crate::error::{ArrowError, Result}; use std::any::Any; use std::sync::Arc; +use super::BooleanBufferBuilder; + #[derive(Debug)] pub struct FixedSizeBinaryBuilder { - builder: FixedSizeListBuilder, + values_builder: UInt8BufferBuilder, + bitmap_builder: BooleanBufferBuilder, + value_length: i32, } impl FixedSizeBinaryBuilder { /// Creates a new `BinaryBuilder`, `capacity` is the number of bytes in the values /// array pub fn new(capacity: usize, byte_width: i32) -> Self { - let values_builder = UInt8Builder::new(capacity); + assert!( + byte_width >= 0, + "value length ({}) of the array must >= 0", + byte_width + ); Self { - builder: FixedSizeListBuilder::new(values_builder, byte_width), + values_builder: UInt8BufferBuilder::new(capacity), + bitmap_builder: BooleanBufferBuilder::new(if byte_width > 0 { + capacity / byte_width as usize + } else { + 0 + }), + value_length: byte_width, } } @@ -43,26 +58,36 @@ impl FixedSizeBinaryBuilder { /// distinct array element. #[inline] pub fn append_value(&mut self, value: impl AsRef<[u8]>) -> Result<()> { - if self.builder.value_length() != value.as_ref().len() as i32 { - return Err(ArrowError::InvalidArgumentError( + if self.value_length != value.as_ref().len() as i32 { + Err(ArrowError::InvalidArgumentError( "Byte slice does not have the same length as FixedSizeBinaryBuilder value lengths".to_string() - )); + )) + } else { + self.values_builder.append_slice(value.as_ref()); + self.bitmap_builder.append(true); + Ok(()) } - self.builder.values().append_slice(value.as_ref())?; - self.builder.append(true) } /// Append a null value to the array. #[inline] pub fn append_null(&mut self) -> Result<()> { - let length: usize = self.builder.value_length() as usize; - self.builder.values().append_slice(&vec![0u8; length][..])?; - self.builder.append(false) + self.values_builder + .append_slice(&vec![0u8; self.value_length as usize][..]); + self.bitmap_builder.append(false); + Ok(()) } /// Builds the `FixedSizeBinaryArray` and reset this builder. pub fn finish(&mut self) -> FixedSizeBinaryArray { - FixedSizeBinaryArray::from(self.builder.finish()) + let array_length = self.len(); + let array_data_builder = + ArrayData::builder(DataType::FixedSizeBinary(self.value_length)) + .add_buffer(self.values_builder.finish()) + .null_bit_buffer(Some(self.bitmap_builder.finish())) + .len(array_length); + let array_data = unsafe { array_data_builder.build_unchecked() }; + FixedSizeBinaryArray::from(array_data) } } @@ -84,12 +109,12 @@ impl ArrayBuilder for FixedSizeBinaryBuilder { /// Returns the number of array slots in the builder fn len(&self) -> usize { - self.builder.len() + self.bitmap_builder.len() } /// Returns whether the number of array slots is zero fn is_empty(&self) -> bool { - self.builder.is_empty() + self.bitmap_builder.is_empty() } /// Builds the array and reset this builder. @@ -114,15 +139,59 @@ mod tests { builder.append_value(b"hello").unwrap(); builder.append_null().unwrap(); builder.append_value(b"arrow").unwrap(); - let fixed_size_binary_array: FixedSizeBinaryArray = builder.finish(); + let array: FixedSizeBinaryArray = builder.finish(); + + assert_eq!(&DataType::FixedSizeBinary(5), array.data_type()); + assert_eq!(3, array.len()); + assert_eq!(1, array.null_count()); + assert_eq!(10, array.value_offset(2)); + assert_eq!(5, array.value_length()); + } + + #[test] + fn test_fixed_size_binary_builder_with_zero_value_length() { + let mut builder = FixedSizeBinaryBuilder::new(0, 0); + builder.append_value(b"").unwrap(); + builder.append_null().unwrap(); + builder.append_value(b"").unwrap(); + assert!(!builder.is_empty()); + + let array: FixedSizeBinaryArray = builder.finish(); + assert_eq!(&DataType::FixedSizeBinary(0), array.data_type()); + assert_eq!(3, array.len()); + assert_eq!(1, array.null_count()); + assert_eq!(0, array.value_offset(2)); + assert_eq!(0, array.value_length()); + assert_eq!(b"", array.value(0)); + assert_eq!(b"", array.value(2)); + } + + #[test] + #[should_panic( + expected = "Byte slice does not have the same length as FixedSizeBinaryBuilder value lengths" + )] + fn test_fixed_size_binary_builder_with_inconsistent_value_length() { + let mut builder = FixedSizeBinaryBuilder::new(15, 4); + // [b"hello", null, "arrow"] + builder.append_value(b"hello").unwrap(); + } + #[test] + fn test_fixed_size_binary_builder_empty() { + let mut builder = FixedSizeBinaryBuilder::new(15, 5); + assert!(builder.is_empty()); + + let fixed_size_binary_array = builder.finish(); assert_eq!( &DataType::FixedSizeBinary(5), fixed_size_binary_array.data_type() ); - assert_eq!(3, fixed_size_binary_array.len()); - assert_eq!(1, fixed_size_binary_array.null_count()); - assert_eq!(10, fixed_size_binary_array.value_offset(2)); - assert_eq!(5, fixed_size_binary_array.value_length()); + assert_eq!(0, fixed_size_binary_array.len()); + } + + #[test] + #[should_panic(expected = "value length (-1) of the array must >= 0")] + fn test_fixed_size_binary_builder_invalid_value_length() { + let _ = FixedSizeBinaryBuilder::new(15, -1); } } From ffce7c3464c3ecfe17b39134373188a3c7f73037 Mon Sep 17 00:00:00 2001 From: remzi <13716567376yh@gmail.com> Date: Wed, 6 Jul 2022 17:25:18 +0800 Subject: [PATCH 2/3] refine the docs Signed-off-by: remzi <13716567376yh@gmail.com> --- arrow/src/array/builder/fixed_size_binary_builder.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arrow/src/array/builder/fixed_size_binary_builder.rs b/arrow/src/array/builder/fixed_size_binary_builder.rs index 8cfddd456261..f8b6d99665d3 100644 --- a/arrow/src/array/builder/fixed_size_binary_builder.rs +++ b/arrow/src/array/builder/fixed_size_binary_builder.rs @@ -33,8 +33,8 @@ pub struct FixedSizeBinaryBuilder { } impl FixedSizeBinaryBuilder { - /// Creates a new `BinaryBuilder`, `capacity` is the number of bytes in the values - /// array + /// Creates a new [`FixedSizeBinaryBuilder`], `capacity` is the number of bytes in the values + /// buffer pub fn new(capacity: usize, byte_width: i32) -> Self { assert!( byte_width >= 0, @@ -54,8 +54,8 @@ impl FixedSizeBinaryBuilder { /// Appends a byte slice into the builder. /// - /// Automatically calls the `append` method to delimit the slice appended in as a - /// distinct array element. + /// Automatically update the null buffer to delimit the slice appended in as a + /// distinct value element. #[inline] pub fn append_value(&mut self, value: impl AsRef<[u8]>) -> Result<()> { if self.value_length != value.as_ref().len() as i32 { @@ -78,7 +78,7 @@ impl FixedSizeBinaryBuilder { Ok(()) } - /// Builds the `FixedSizeBinaryArray` and reset this builder. + /// Builds the [`FixedSizeBinaryArray`] and reset this builder. pub fn finish(&mut self) -> FixedSizeBinaryArray { let array_length = self.len(); let array_data_builder = From e281857680462beb4e63f0ebf9ecc9a194d51ce4 Mon Sep 17 00:00:00 2001 From: remzi <13716567376yh@gmail.com> Date: Wed, 6 Jul 2022 17:30:54 +0800 Subject: [PATCH 3/3] nit Signed-off-by: remzi <13716567376yh@gmail.com> --- arrow/src/array/builder/fixed_size_binary_builder.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/arrow/src/array/builder/fixed_size_binary_builder.rs b/arrow/src/array/builder/fixed_size_binary_builder.rs index f8b6d99665d3..e62aa8fa60cf 100644 --- a/arrow/src/array/builder/fixed_size_binary_builder.rs +++ b/arrow/src/array/builder/fixed_size_binary_builder.rs @@ -173,7 +173,6 @@ mod tests { )] fn test_fixed_size_binary_builder_with_inconsistent_value_length() { let mut builder = FixedSizeBinaryBuilder::new(15, 4); - // [b"hello", null, "arrow"] builder.append_value(b"hello").unwrap(); } #[test]