Skip to content

Commit

Permalink
Rewrite ArrayDataBuilder::null_bit_buffer (#1739)
Browse files Browse the repository at this point in the history
* clean up

Signed-off-by: remzi <13716567376yh@gmail.com>

* fix a nit

Signed-off-by: remzi <13716567376yh@gmail.com>

* use boolean::then

Signed-off-by: remzi <13716567376yh@gmail.com>

* fix merge conflict

Signed-off-by: remzi <13716567376yh@gmail.com>
  • Loading branch information
HaoYang670 authored May 27, 2022
1 parent b265780 commit 9722f06
Show file tree
Hide file tree
Showing 34 changed files with 212 additions and 252 deletions.
2 changes: 1 addition & 1 deletion arrow/examples/builders.rs
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ fn main() {
.len(3)
.add_buffer(Buffer::from(offsets.to_byte_slice()))
.add_buffer(Buffer::from(&values[..]))
.null_bit_buffer(Buffer::from([0b00000101]))
.null_bit_buffer(Some(Buffer::from([0b00000101])))
.build()
.unwrap();
let binary_array = StringArray::from(array_data);
Expand Down
2 changes: 1 addition & 1 deletion arrow/src/array/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -877,7 +877,7 @@ mod tests {
let empty_with_bitmap = PrimitiveArray::<Int64Type>::from(
ArrayData::builder(arr.data_type().clone())
.add_buffer(MutableBuffer::new(0).into())
.null_bit_buffer(MutableBuffer::new_null(0).into())
.null_bit_buffer(Some(MutableBuffer::new_null(0).into()))
.build()
.unwrap(),
);
Expand Down
26 changes: 10 additions & 16 deletions arrow/src/array/array_binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -152,13 +152,11 @@ impl<OffsetSize: OffsetSizeTrait> GenericBinaryArray<OffsetSize> {
"BinaryArray can only be created from List<u8> arrays, mismatched data types."
);

let mut builder = ArrayData::builder(Self::get_data_type())
let builder = ArrayData::builder(Self::get_data_type())
.len(v.len())
.add_buffer(v.data_ref().buffers()[0].clone())
.add_buffer(v.data_ref().child_data()[0].buffers()[0].clone());
if let Some(bitmap) = v.data_ref().null_bitmap() {
builder = builder.null_bit_buffer(bitmap.bits.clone())
}
.add_buffer(v.data_ref().child_data()[0].buffers()[0].clone())
.null_bit_buffer(v.data_ref().null_buffer().cloned());

let data = unsafe { builder.build_unchecked() };
Self::from(data)
Expand Down Expand Up @@ -308,7 +306,7 @@ where
.len(data_len)
.add_buffer(Buffer::from_slice_ref(&offsets))
.add_buffer(Buffer::from_slice_ref(&values))
.null_bit_buffer(null_buf.into());
.null_bit_buffer(Some(null_buf.into()));
let array_data = unsafe { array_data.build_unchecked() };
Self::from(array_data)
}
Expand Down Expand Up @@ -692,12 +690,10 @@ impl From<FixedSizeListArray> for FixedSizeBinaryArray {
"FixedSizeBinaryArray can only be created from FixedSizeList<u8> arrays, mismatched data types."
);

let mut builder = ArrayData::builder(DataType::FixedSizeBinary(v.value_length()))
let builder = ArrayData::builder(DataType::FixedSizeBinary(v.value_length()))
.len(v.len())
.add_buffer(v.data_ref().child_data()[0].buffers()[0].clone());
if let Some(bitmap) = v.data_ref().null_bitmap() {
builder = builder.null_bit_buffer(bitmap.bits.clone())
}
.add_buffer(v.data_ref().child_data()[0].buffers()[0].clone())
.null_bit_buffer(v.data_ref().null_buffer().cloned());

let data = unsafe { builder.build_unchecked() };
Self::from(data)
Expand Down Expand Up @@ -849,12 +845,10 @@ impl DecimalArray {
"DecimalArray can only be created from FixedSizeList<u8> arrays, mismatched data types."
);

let mut builder = ArrayData::builder(DataType::Decimal(precision, scale))
let builder = ArrayData::builder(DataType::Decimal(precision, scale))
.len(v.len())
.add_buffer(v.data_ref().child_data()[0].buffers()[0].clone());
if let Some(bitmap) = v.data_ref().null_bitmap() {
builder = builder.null_bit_buffer(bitmap.bits.clone())
}
.add_buffer(v.data_ref().child_data()[0].buffers()[0].clone())
.null_bit_buffer(v.data_ref().null_buffer().cloned());

let array_data = unsafe { builder.build_unchecked() };
Self::from(array_data)
Expand Down
2 changes: 1 addition & 1 deletion arrow/src/array/array_dictionary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ impl<'a, K: ArrowPrimitiveType> DictionaryArray<K> {
match keys.data().null_buffer() {
Some(buffer) if keys.data().null_count() > 0 => {
data = data
.null_bit_buffer(buffer.clone())
.null_bit_buffer(Some(buffer.clone()))
.null_count(keys.data().null_count());
}
_ => data = data.null_count(0),
Expand Down
12 changes: 6 additions & 6 deletions arrow/src/array/array_list.rs
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ impl<OffsetSize: OffsetSizeTrait> GenericListArray<OffsetSize> {
.len(null_buf.len())
.add_buffer(offsets.into())
.add_child_data(values.data().clone())
.null_bit_buffer(null_buf.into());
.null_bit_buffer(Some(null_buf.into()));
let array_data = unsafe { array_data.build_unchecked() };

Self::from(array_data)
Expand Down Expand Up @@ -836,7 +836,7 @@ mod tests {
.len(9)
.add_buffer(value_offsets)
.add_child_data(value_data.clone())
.null_bit_buffer(Buffer::from(null_bits))
.null_bit_buffer(Some(Buffer::from(null_bits)))
.build()
.unwrap();
let list_array = ListArray::from(list_data);
Expand Down Expand Up @@ -900,7 +900,7 @@ mod tests {
.len(9)
.add_buffer(value_offsets)
.add_child_data(value_data.clone())
.null_bit_buffer(Buffer::from(null_bits))
.null_bit_buffer(Some(Buffer::from(null_bits)))
.build()
.unwrap();
let list_array = LargeListArray::from(list_data);
Expand Down Expand Up @@ -967,7 +967,7 @@ mod tests {
.len(9)
.add_buffer(value_offsets)
.add_child_data(value_data)
.null_bit_buffer(Buffer::from(null_bits))
.null_bit_buffer(Some(Buffer::from(null_bits)))
.build()
.unwrap();
let list_array = LargeListArray::from(list_data);
Expand Down Expand Up @@ -1001,7 +1001,7 @@ mod tests {
let list_data = ArrayData::builder(list_data_type)
.len(5)
.add_child_data(value_data.clone())
.null_bit_buffer(Buffer::from(null_bits))
.null_bit_buffer(Some(Buffer::from(null_bits)))
.build()
.unwrap();
let list_array = FixedSizeListArray::from(list_data);
Expand Down Expand Up @@ -1063,7 +1063,7 @@ mod tests {
let list_data = ArrayData::builder(list_data_type)
.len(5)
.add_child_data(value_data)
.null_bit_buffer(Buffer::from(null_bits))
.null_bit_buffer(Some(Buffer::from(null_bits)))
.build()
.unwrap();
let list_array = FixedSizeListArray::from(list_data);
Expand Down
2 changes: 1 addition & 1 deletion arrow/src/array/array_map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -296,7 +296,7 @@ mod tests {
.add_buffer(Buffer::from(
&[0u32, 10, 20, 0, 40, 0, 60, 70].to_byte_slice(),
))
.null_bit_buffer(Buffer::from(&[0b11010110]))
.null_bit_buffer(Some(Buffer::from(&[0b11010110])))
.build()
.unwrap();

Expand Down
2 changes: 1 addition & 1 deletion arrow/src/array/array_primitive.rs
Original file line number Diff line number Diff line change
Expand Up @@ -550,7 +550,7 @@ impl<T: ArrowTimestampType> PrimitiveArray<T> {
ArrayData::builder(DataType::Timestamp(T::get_time_unit(), timezone))
.len(data_len)
.add_buffer(val_buf.into())
.null_bit_buffer(null_buf.into());
.null_bit_buffer(Some(null_buf.into()));
let array_data = unsafe { array_data.build_unchecked() };
PrimitiveArray::from(array_data)
}
Expand Down
10 changes: 4 additions & 6 deletions arrow/src/array/array_string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -131,13 +131,11 @@ impl<OffsetSize: OffsetSizeTrait> GenericStringArray<OffsetSize> {
"StringArray can only be created from List<u8> arrays, mismatched data types."
);

let mut builder = ArrayData::builder(Self::get_data_type())
let builder = ArrayData::builder(Self::get_data_type())
.len(v.len())
.add_buffer(v.data().buffers()[0].clone())
.add_buffer(v.data().child_data()[0].buffers()[0].clone());
if let Some(bitmap) = v.data().null_bitmap() {
builder = builder.null_bit_buffer(bitmap.bits.clone())
}
.add_buffer(v.data().child_data()[0].buffers()[0].clone())
.null_bit_buffer(v.data().null_buffer().cloned());

let array_data = unsafe { builder.build_unchecked() };
Self::from(array_data)
Expand Down Expand Up @@ -252,7 +250,7 @@ where
.len(data_len)
.add_buffer(offsets.into())
.add_buffer(values.into())
.null_bit_buffer(null_buf.into());
.null_bit_buffer(Some(null_buf.into()));
let array_data = unsafe { array_data.build_unchecked() };
Self::from(array_data)
}
Expand Down
18 changes: 8 additions & 10 deletions arrow/src/array/array_struct.rs
Original file line number Diff line number Diff line change
Expand Up @@ -176,12 +176,10 @@ impl TryFrom<Vec<(&str, ArrayRef)>> for StructArray {
}
let len = len.unwrap();

let mut builder = ArrayData::builder(DataType::Struct(fields))
let builder = ArrayData::builder(DataType::Struct(fields))
.len(len)
.null_bit_buffer(null)
.child_data(child_data);
if let Some(null_buffer) = null {
builder = builder.null_bit_buffer(null_buffer);
}

let array_data = unsafe { builder.build_unchecked() };

Expand Down Expand Up @@ -270,7 +268,7 @@ impl From<(Vec<(Field, ArrayRef)>, Buffer)> for StructArray {
}

let array_data = ArrayData::builder(DataType::Struct(field_types))
.null_bit_buffer(pair.1)
.null_bit_buffer(Some(pair.1))
.child_data(field_values.into_iter().map(|a| a.data().clone()).collect())
.len(length);
let array_data = unsafe { array_data.build_unchecked() };
Expand Down Expand Up @@ -366,15 +364,15 @@ mod tests {

let expected_string_data = ArrayData::builder(DataType::Utf8)
.len(4)
.null_bit_buffer(Buffer::from(&[9_u8]))
.null_bit_buffer(Some(Buffer::from(&[9_u8])))
.add_buffer(Buffer::from(&[0, 3, 3, 3, 7].to_byte_slice()))
.add_buffer(Buffer::from(b"joemark"))
.build()
.unwrap();

let expected_int_data = ArrayData::builder(DataType::Int32)
.len(4)
.null_bit_buffer(Buffer::from(&[11_u8]))
.null_bit_buffer(Some(Buffer::from(&[11_u8])))
.add_buffer(Buffer::from(&[1, 2, 0, 4].to_byte_slice()))
.build()
.unwrap();
Expand Down Expand Up @@ -428,13 +426,13 @@ mod tests {
let boolean_data = ArrayData::builder(DataType::Boolean)
.len(5)
.add_buffer(Buffer::from([0b00010000]))
.null_bit_buffer(Buffer::from([0b00010001]))
.null_bit_buffer(Some(Buffer::from([0b00010001])))
.build()
.unwrap();
let int_data = ArrayData::builder(DataType::Int32)
.len(5)
.add_buffer(Buffer::from([0, 28, 42, 0, 0].to_byte_slice()))
.null_bit_buffer(Buffer::from([0b00000110]))
.null_bit_buffer(Some(Buffer::from([0b00000110])))
.build()
.unwrap();

Expand All @@ -446,7 +444,7 @@ mod tests {
.len(5)
.add_child_data(boolean_data.clone())
.add_child_data(int_data.clone())
.null_bit_buffer(Buffer::from([0b00010111]))
.null_bit_buffer(Some(Buffer::from([0b00010111])))
.build()
.unwrap();
let struct_array = StructArray::from(struct_array_data);
Expand Down
42 changes: 22 additions & 20 deletions arrow/src/array/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -630,12 +630,11 @@ impl BooleanBuilder {
let len = self.len();
let null_bit_buffer = self.bitmap_builder.finish();
let null_count = len - null_bit_buffer.count_set_bits();
let mut builder = ArrayData::builder(DataType::Boolean)
let builder = ArrayData::builder(DataType::Boolean)
.len(len)
.add_buffer(self.values_builder.finish());
if null_count > 0 {
builder = builder.null_bit_buffer(null_bit_buffer);
}
.add_buffer(self.values_builder.finish())
.null_bit_buffer((null_count > 0).then(|| null_bit_buffer));

let array_data = unsafe { builder.build_unchecked() };
BooleanArray::from(array_data)
}
Expand Down Expand Up @@ -829,12 +828,15 @@ impl<T: ArrowPrimitiveType> PrimitiveBuilder<T> {
.as_ref()
.map(|b| b.count_set_bits())
.unwrap_or(len);
let mut builder = ArrayData::builder(T::DATA_TYPE)
let builder = ArrayData::builder(T::DATA_TYPE)
.len(len)
.add_buffer(self.values_builder.finish());
if null_count > 0 {
builder = builder.null_bit_buffer(null_bit_buffer.unwrap());
}
.add_buffer(self.values_builder.finish())
.null_bit_buffer(if null_count > 0 {
null_bit_buffer
} else {
None
});

let array_data = unsafe { builder.build_unchecked() };
PrimitiveArray::<T>::from(array_data)
}
Expand All @@ -856,7 +858,7 @@ impl<T: ArrowPrimitiveType> PrimitiveBuilder<T> {
.len(len)
.add_buffer(self.values_builder.finish());
if null_count > 0 {
builder = builder.null_bit_buffer(null_bit_buffer.unwrap());
builder = builder.null_bit_buffer(null_bit_buffer);
}
builder = builder.add_child_data(values.data().clone());
let array_data = unsafe { builder.build_unchecked() };
Expand Down Expand Up @@ -992,7 +994,7 @@ where
.len(len)
.add_buffer(offset_buffer)
.add_child_data(values_data.clone())
.null_bit_buffer(null_bit_buffer);
.null_bit_buffer(Some(null_bit_buffer));

let array_data = unsafe { array_data.build_unchecked() };

Expand Down Expand Up @@ -1123,7 +1125,7 @@ where
))
.len(len)
.add_child_data(values_data.clone())
.null_bit_buffer(null_bit_buffer);
.null_bit_buffer(Some(null_bit_buffer));

let array_data = unsafe { array_data.build_unchecked() };

Expand Down Expand Up @@ -1710,7 +1712,7 @@ impl StructBuilder {
.len(self.len)
.child_data(child_data);
if null_count > 0 {
builder = builder.null_bit_buffer(null_bit_buffer);
builder = builder.null_bit_buffer(Some(null_bit_buffer));
}

self.len = 0;
Expand Down Expand Up @@ -1845,7 +1847,7 @@ impl<K: ArrayBuilder, V: ArrayBuilder> MapBuilder<K, V> {
.len(len)
.add_buffer(offset_buffer)
.add_child_data(struct_array.data().clone())
.null_bit_buffer(null_bit_buffer);
.null_bit_buffer(Some(null_bit_buffer));

let array_data = unsafe { array_data.build_unchecked() };

Expand Down Expand Up @@ -2155,7 +2157,7 @@ impl UnionBuilder {
let arr_data_builder = ArrayDataBuilder::new(data_type.clone())
.add_buffer(buffer)
.len(slots)
.null_bit_buffer(bitmap_builder.finish());
.null_bit_buffer(Some(bitmap_builder.finish()));

let arr_data_ref = unsafe { arr_data_builder.build_unchecked() };
let array_ref = make_array(arr_data_ref);
Expand Down Expand Up @@ -3534,15 +3536,15 @@ mod tests {

let expected_string_data = ArrayData::builder(DataType::Utf8)
.len(4)
.null_bit_buffer(Buffer::from(&[9_u8]))
.null_bit_buffer(Some(Buffer::from(&[9_u8])))
.add_buffer(Buffer::from_slice_ref(&[0, 3, 3, 3, 7]))
.add_buffer(Buffer::from_slice_ref(b"joemark"))
.build()
.unwrap();

let expected_int_data = ArrayData::builder(DataType::Int32)
.len(4)
.null_bit_buffer(Buffer::from_slice_ref(&[11_u8]))
.null_bit_buffer(Some(Buffer::from_slice_ref(&[11_u8])))
.add_buffer(Buffer::from_slice_ref(&[1, 2, 0, 4]))
.build()
.unwrap();
Expand Down Expand Up @@ -3648,15 +3650,15 @@ mod tests {

let expected_string_data = ArrayData::builder(DataType::Utf8)
.len(4)
.null_bit_buffer(Buffer::from(&[9_u8]))
.null_bit_buffer(Some(Buffer::from(&[9_u8])))
.add_buffer(Buffer::from_slice_ref(&[0, 3, 3, 3, 7]))
.add_buffer(Buffer::from_slice_ref(b"joemark"))
.build()
.unwrap();

let expected_int_data = ArrayData::builder(DataType::Int32)
.len(4)
.null_bit_buffer(Buffer::from_slice_ref(&[11_u8]))
.null_bit_buffer(Some(Buffer::from_slice_ref(&[11_u8])))
.add_buffer(Buffer::from_slice_ref(&[1, 2, 0, 4]))
.build()
.unwrap();
Expand Down
Loading

0 comments on commit 9722f06

Please sign in to comment.