Skip to content

Commit

Permalink
refactor: construct StructArray w/ FieldRef
Browse files Browse the repository at this point in the history
`DataType` uses `Fields`/`FieldRef` internally. Accepting `Field` just
to wrap it into an `Arc` is unnecessary expensive, esp. when the `Field`
was cloned from an `FieldRef` (happens in some non-test code).

I've decided to NOT allow the construction from `Field` anymore because
in prod code this is most likely a performance bug.
  • Loading branch information
crepererum committed Apr 26, 2023
1 parent b981921 commit fa3d4f8
Show file tree
Hide file tree
Showing 21 changed files with 136 additions and 122 deletions.
22 changes: 11 additions & 11 deletions arrow-array/src/array/map_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -197,12 +197,12 @@ impl MapArray {
let entry_offsets_buffer = Buffer::from(entry_offsets.to_byte_slice());
let keys_data = StringArray::from_iter_values(keys);

let keys_field = Field::new("keys", DataType::Utf8, false);
let values_field = Field::new(
let keys_field = Arc::new(Field::new("keys", DataType::Utf8, false));
let values_field = Arc::new(Field::new(
"values",
values.data_type().clone(),
values.null_count() > 0,
);
));

let entry_struct = StructArray::from(vec![
(keys_field, Arc::new(keys_data) as ArrayRef),
Expand Down Expand Up @@ -336,8 +336,8 @@ mod tests {
// [[0, 1, 2], [3, 4, 5], [6, 7]]
let entry_offsets = Buffer::from(&[0, 3, 6, 8].to_byte_slice());

let keys = Field::new("keys", DataType::Int32, false);
let values = Field::new("values", DataType::UInt32, false);
let keys = Arc::new(Field::new("keys", DataType::Int32, false));
let values = Arc::new(Field::new("values", DataType::UInt32, false));
let entry_struct = StructArray::from(vec![
(keys, make_array(keys_data)),
(values, make_array(values_data)),
Expand Down Expand Up @@ -382,8 +382,8 @@ mod tests {
// [[0, 1, 2], [3, 4, 5], [6, 7]]
let entry_offsets = Buffer::from(&[0, 3, 6, 8].to_byte_slice());

let keys_field = Field::new("keys", DataType::Int32, false);
let values_field = Field::new("values", DataType::UInt32, true);
let keys_field = Arc::new(Field::new("keys", DataType::Int32, false));
let values_field = Arc::new(Field::new("values", DataType::UInt32, true));
let entry_struct = StructArray::from(vec![
(keys_field.clone(), make_array(key_data)),
(values_field.clone(), make_array(value_data.clone())),
Expand Down Expand Up @@ -504,8 +504,8 @@ mod tests {
// [[3, 4, 5], [6, 7]]
let entry_offsets = Buffer::from(&[0, 3, 5].to_byte_slice());

let keys = Field::new("keys", DataType::Int32, false);
let values = Field::new("values", DataType::UInt32, false);
let keys = Arc::new(Field::new("keys", DataType::Int32, false));
let values = Arc::new(Field::new("values", DataType::UInt32, false));
let entry_struct = StructArray::from(vec![
(keys, make_array(keys_data)),
(values, make_array(values_data)),
Expand Down Expand Up @@ -582,8 +582,8 @@ mod tests {

let key_array = Arc::new(StringArray::from(vec!["a", "b", "c"])) as ArrayRef;
let value_array = Arc::new(UInt32Array::from(vec![0u32, 10, 20])) as ArrayRef;
let keys_field = Field::new("keys", DataType::Utf8, false);
let values_field = Field::new("values", DataType::UInt32, false);
let keys_field = Arc::new(Field::new("keys", DataType::Utf8, false));
let values_field = Arc::new(Field::new("values", DataType::UInt32, false));
let struct_array =
StructArray::from(vec![(keys_field, key_array), (values_field, value_array)]);
assert_eq!(
Expand Down
34 changes: 17 additions & 17 deletions arrow-array/src/array/struct_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
use crate::{make_array, new_null_array, Array, ArrayRef, RecordBatch};
use arrow_buffer::{BooleanBuffer, Buffer, NullBuffer};
use arrow_data::{ArrayData, ArrayDataBuilder};
use arrow_schema::{ArrowError, DataType, Field, Fields, SchemaBuilder};
use arrow_schema::{ArrowError, DataType, Field, FieldRef, Fields, SchemaBuilder};
use std::sync::Arc;
use std::{any::Any, ops::Index};

Expand Down Expand Up @@ -58,11 +58,11 @@ use std::{any::Any, ops::Index};
///
/// let struct_array = StructArray::from(vec![
/// (
/// Field::new("b", DataType::Boolean, false),
/// Arc::new(Field::new("b", DataType::Boolean, false)),
/// boolean.clone() as ArrayRef,
/// ),
/// (
/// Field::new("c", DataType::Int32, false),
/// Arc::new(Field::new("c", DataType::Int32, false)),
/// int.clone() as ArrayRef,
/// ),
/// ]);
Expand Down Expand Up @@ -379,8 +379,8 @@ impl Array for StructArray {
}
}

impl From<Vec<(Field, ArrayRef)>> for StructArray {
fn from(v: Vec<(Field, ArrayRef)>) -> Self {
impl From<Vec<(FieldRef, ArrayRef)>> for StructArray {
fn from(v: Vec<(FieldRef, ArrayRef)>) -> Self {
let (schema, arrays): (SchemaBuilder, _) = v.into_iter().unzip();
StructArray::new(schema.finish().fields, arrays, None)
}
Expand All @@ -405,8 +405,8 @@ impl std::fmt::Debug for StructArray {
}
}

impl From<(Vec<(Field, ArrayRef)>, Buffer)> for StructArray {
fn from(pair: (Vec<(Field, ArrayRef)>, Buffer)) -> Self {
impl From<(Vec<(FieldRef, ArrayRef)>, Buffer)> for StructArray {
fn from(pair: (Vec<(FieldRef, ArrayRef)>, Buffer)) -> Self {
let len = pair.0.first().map(|x| x.1.len()).unwrap_or_default();
let (fields, arrays): (SchemaBuilder, Vec<_>) = pair.0.into_iter().unzip();
let nulls = NullBuffer::new(BooleanBuffer::new(pair.1, 0, len));
Expand Down Expand Up @@ -480,11 +480,11 @@ mod tests {

let struct_array = StructArray::from(vec![
(
Field::new("b", DataType::Boolean, false),
Arc::new(Field::new("b", DataType::Boolean, false)),
boolean.clone() as ArrayRef,
),
(
Field::new("c", DataType::Int32, false),
Arc::new(Field::new("c", DataType::Int32, false)),
int.clone() as ArrayRef,
),
]);
Expand All @@ -503,11 +503,11 @@ mod tests {

let struct_array = StructArray::from(vec![
(
Field::new("b", DataType::Boolean, false),
Arc::new(Field::new("b", DataType::Boolean, false)),
boolean.clone() as ArrayRef,
),
(
Field::new("c", DataType::Int32, false),
Arc::new(Field::new("c", DataType::Int32, false)),
int.clone() as ArrayRef,
),
]);
Expand Down Expand Up @@ -582,7 +582,7 @@ mod tests {
)]
fn test_struct_array_from_mismatched_types_single() {
drop(StructArray::from(vec![(
Field::new("b", DataType::Int16, false),
Arc::new(Field::new("b", DataType::Int16, false)),
Arc::new(BooleanArray::from(vec![false, false, true, true]))
as Arc<dyn Array>,
)]));
Expand All @@ -595,12 +595,12 @@ mod tests {
fn test_struct_array_from_mismatched_types_multiple() {
drop(StructArray::from(vec![
(
Field::new("b", DataType::Int16, false),
Arc::new(Field::new("b", DataType::Int16, false)),
Arc::new(BooleanArray::from(vec![false, false, true, true]))
as Arc<dyn Array>,
),
(
Field::new("c", DataType::Utf8, false),
Arc::new(Field::new("c", DataType::Utf8, false)),
Arc::new(Int32Array::from(vec![42, 28, 19, 31])),
),
]));
Expand Down Expand Up @@ -700,11 +700,11 @@ mod tests {
fn test_invalid_struct_child_array_lengths() {
drop(StructArray::from(vec![
(
Field::new("b", DataType::Float32, false),
Arc::new(Field::new("b", DataType::Float32, false)),
Arc::new(Float32Array::from(vec![1.1])) as Arc<dyn Array>,
),
(
Field::new("c", DataType::Float64, false),
Arc::new(Field::new("c", DataType::Float64, false)),
Arc::new(Float64Array::from(vec![2.2, 3.3])),
),
]));
Expand All @@ -722,7 +722,7 @@ mod tests {
)]
fn test_struct_array_from_mismatched_nullability() {
drop(StructArray::from(vec![(
Field::new("c", DataType::Int32, false),
Arc::new(Field::new("c", DataType::Int32, false)),
Arc::new(Int32Array::from(vec![Some(42), None, Some(19)])) as ArrayRef,
)]));
}
Expand Down
8 changes: 4 additions & 4 deletions arrow-array/src/builder/map_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -181,16 +181,16 @@ impl<K: ArrayBuilder, V: ArrayBuilder> MapBuilder<K, V> {
keys_arr.null_count()
);

let keys_field = Field::new(
let keys_field = Arc::new(Field::new(
self.field_names.key.as_str(),
keys_arr.data_type().clone(),
false, // always non-nullable
);
let values_field = Field::new(
));
let values_field = Arc::new(Field::new(
self.field_names.value.as_str(),
values_arr.data_type().clone(),
true,
);
));

let struct_array =
StructArray::from(vec![(keys_field, keys_arr), (values_field, values_arr)]);
Expand Down
6 changes: 3 additions & 3 deletions arrow-array/src/builder/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -115,14 +115,14 @@
//! /// Note: returns StructArray to allow nesting within another array if desired
//! fn finish(&mut self) -> StructArray {
//! let i32 = Arc::new(self.i32.finish()) as ArrayRef;
//! let i32_field = Field::new("i32", DataType::Int32, false);
//! let i32_field = Arc::new(Field::new("i32", DataType::Int32, false));
//!
//! let string = Arc::new(self.string.finish()) as ArrayRef;
//! let string_field = Field::new("i32", DataType::Utf8, false);
//! let string_field = Arc::new(Field::new("i32", DataType::Utf8, false));
//!
//! let i32_list = Arc::new(self.i32_list.finish()) as ArrayRef;
//! let value_field = Arc::new(Field::new("item", DataType::Int32, true));
//! let i32_list_field = Field::new("i32_list", DataType::List(value_field), true);
//! let i32_list_field = Arc::new(Field::new("i32_list", DataType::List(value_field), true));
//!
//! StructArray::from(vec![
//! (i32_field, i32),
Expand Down
4 changes: 2 additions & 2 deletions arrow-array/src/record_batch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -788,11 +788,11 @@ mod tests {
let int = Arc::new(Int32Array::from(vec![42, 28, 19, 31]));
let struct_array = StructArray::from(vec![
(
Field::new("b", DataType::Boolean, false),
Arc::new(Field::new("b", DataType::Boolean, false)),
boolean.clone() as ArrayRef,
),
(
Field::new("c", DataType::Int32, false),
Arc::new(Field::new("c", DataType::Int32, false)),
int.clone() as ArrayRef,
),
]);
Expand Down
8 changes: 4 additions & 4 deletions arrow-cast/src/pretty.rs
Original file line number Diff line number Diff line change
Expand Up @@ -649,17 +649,17 @@ mod tests {

let c1 = StructArray::from(vec![
(
Field::new("c11", DataType::Int32, true),
Arc::new(Field::new("c11", DataType::Int32, true)),
Arc::new(Int32Array::from(vec![Some(1), None, Some(5)])) as ArrayRef,
),
(
Field::new_struct(
Arc::new(Field::new_struct(
"c12",
vec![Field::new("c121", DataType::Utf8, false)],
false,
),
)),
Arc::new(StructArray::from(vec![(
Field::new("c121", DataType::Utf8, false),
Arc::new(Field::new("c121", DataType::Utf8, false)),
Arc::new(StringArray::from(vec![Some("e"), Some("f"), Some("g")]))
as ArrayRef,
)])) as ArrayRef,
Expand Down
19 changes: 11 additions & 8 deletions arrow-ipc/src/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ fn create_array(
)?;
node_index = triple.1;
buffer_index = triple.2;
struct_arrays.push((struct_field.as_ref().clone(), triple.0));
struct_arrays.push((struct_field.clone(), triple.0));
}
let null_count = struct_node.null_count() as usize;
let struct_array = if null_count > 0 {
Expand Down Expand Up @@ -1593,7 +1593,7 @@ mod tests {

let array = Arc::new(inner) as ArrayRef;

let dctfield = Field::new("dict", array.data_type().clone(), false);
let dctfield = Arc::new(Field::new("dict", array.data_type().clone(), false));

let s = StructArray::from(vec![(dctfield, array)]);
let struct_array = Arc::new(s) as ArrayRef;
Expand Down Expand Up @@ -1695,9 +1695,12 @@ mod tests {
);
let string_array: ArrayRef = Arc::new(StringArray::from(xs.clone()));
let struct_array = StructArray::from(vec![
(Field::new("f2.1", DataType::Utf8, false), string_array),
(
Field::new("f2.2_struct", dict.data_type().clone(), false),
Arc::new(Field::new("f2.1", DataType::Utf8, false)),
string_array,
),
(
Arc::new(Field::new("f2.2_struct", dict.data_type().clone(), false)),
dict.clone() as ArrayRef,
),
]);
Expand Down Expand Up @@ -1727,20 +1730,20 @@ mod tests {
let key_dict_keys = Int8Array::from_iter_values([0, 0, 2, 1, 1, 3]);
let key_dict_array = DictionaryArray::new(key_dict_keys, values);

let keys_field = Field::new_dict(
let keys_field = Arc::new(Field::new_dict(
"keys",
DataType::Dictionary(Box::new(DataType::Int8), Box::new(DataType::Utf8)),
true,
1,
false,
);
let values_field = Field::new_dict(
));
let values_field = Arc::new(Field::new_dict(
"values",
DataType::Dictionary(Box::new(DataType::Int8), Box::new(DataType::Utf8)),
true,
1,
false,
);
));
let entry_struct = StructArray::from(vec![
(keys_field, make_array(key_dict_array.into_data())),
(values_field, make_array(value_dict_array.into_data())),
Expand Down
13 changes: 9 additions & 4 deletions arrow-ipc/src/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1692,8 +1692,13 @@ mod tests {
let array = Arc::new(inner) as ArrayRef;

// Dict field with id 2
let dctfield =
Field::new_dict("dict", array.data_type().clone(), false, 2, false);
let dctfield = Arc::new(Field::new_dict(
"dict",
array.data_type().clone(),
false,
2,
false,
));

let s = StructArray::from(vec![(dctfield, array)]);
let struct_array = Arc::new(s) as ArrayRef;
Expand Down Expand Up @@ -1896,11 +1901,11 @@ mod tests {

let struct_array = StructArray::from(vec![
(
Field::new("s", DataType::Utf8, true),
Arc::new(Field::new("s", DataType::Utf8, true)),
Arc::new(strings) as ArrayRef,
),
(
Field::new("c", DataType::Int32, true),
Arc::new(Field::new("c", DataType::Int32, true)),
Arc::new(ints) as ArrayRef,
),
]);
Expand Down
Loading

0 comments on commit fa3d4f8

Please sign in to comment.