Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions arrow-array/benches/union_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,10 @@ fn criterion_benchmark(c: &mut Criterion) {
|b| {
let type_ids = 0..with_nulls+without_nulls;

let fields = UnionFields::new(
let fields = UnionFields::try_new(
type_ids.clone(),
type_ids.clone().map(|i| Field::new(format!("f{i}"), DataType::Int32, true)),
);
).unwrap();

let array = UnionArray::try_new(
fields,
Expand Down
5 changes: 3 additions & 2 deletions arrow-array/src/array/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1067,13 +1067,14 @@ mod tests {
fn test_null_union() {
for mode in [UnionMode::Sparse, UnionMode::Dense] {
let data_type = DataType::Union(
UnionFields::new(
UnionFields::try_new(
vec![2, 1],
vec![
Field::new("foo", DataType::Int32, true),
Field::new("bar", DataType::Int64, true),
],
),
)
.unwrap(),
mode,
);
let array = new_null_array(&data_type, 4);
Expand Down
25 changes: 15 additions & 10 deletions arrow-array/src/array/union_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1674,14 +1674,15 @@ mod tests {
#[test]
fn test_custom_type_ids() {
let data_type = DataType::Union(
UnionFields::new(
UnionFields::try_new(
vec![8, 4, 9],
vec![
Field::new("strings", DataType::Utf8, false),
Field::new("integers", DataType::Int32, false),
Field::new("floats", DataType::Float64, false),
],
),
)
.unwrap(),
UnionMode::Dense,
);

Expand Down Expand Up @@ -1788,14 +1789,15 @@ mod tests {
fn into_parts_custom_type_ids() {
let set_field_type_ids: [i8; 3] = [8, 4, 9];
let data_type = DataType::Union(
UnionFields::new(
UnionFields::try_new(
set_field_type_ids,
[
Field::new("strings", DataType::Utf8, false),
Field::new("integers", DataType::Int32, false),
Field::new("floats", DataType::Float64, false),
],
),
)
.unwrap(),
UnionMode::Dense,
);
let string_array = StringArray::from(vec!["foo", "bar", "baz"]);
Expand Down Expand Up @@ -1828,13 +1830,14 @@ mod tests {

#[test]
fn test_invalid() {
let fields = UnionFields::new(
let fields = UnionFields::try_new(
[3, 2],
[
Field::new("a", DataType::Utf8, false),
Field::new("b", DataType::Utf8, false),
],
);
)
.unwrap();
let children = vec![
Arc::new(StringArray::from_iter_values(["a", "b"])) as _,
Arc::new(StringArray::from_iter_values(["c", "d"])) as _,
Expand Down Expand Up @@ -1904,13 +1907,14 @@ mod tests {

assert_eq!(array.logical_nulls(), None);

let fields = UnionFields::new(
let fields = UnionFields::try_new(
[1, 3],
[
Field::new("a", DataType::Int8, false), // non nullable
Field::new("b", DataType::Int8, false), // non nullable
],
);
)
.unwrap();
let array = UnionArray::try_new(
fields,
vec![1].into(),
Expand All @@ -1924,13 +1928,14 @@ mod tests {

assert_eq!(array.logical_nulls(), None);

let nullable_fields = UnionFields::new(
let nullable_fields = UnionFields::try_new(
[1, 3],
[
Field::new("a", DataType::Int8, true), // nullable but without nulls
Field::new("b", DataType::Int8, true), // nullable but without nulls
],
);
)
.unwrap();
let array = UnionArray::try_new(
nullable_fields.clone(),
vec![1, 1].into(),
Expand Down
5 changes: 3 additions & 2 deletions arrow-avro/benches/avro_writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -688,14 +688,15 @@ static ENUM_DATA: Lazy<Vec<RecordBatch>> = Lazy::new(|| {

static UNION_DATA: Lazy<Vec<RecordBatch>> = Lazy::new(|| {
// Basic Dense Union of three types: Utf8, Int32, Float64
let union_fields = UnionFields::new(
let union_fields = UnionFields::try_new(
vec![0, 1, 2],
vec![
Field::new("u_str", DataType::Utf8, true),
Field::new("u_int", DataType::Int32, true),
Field::new("u_f64", DataType::Float64, true),
],
);
)
.expect("UnionFields should be valid");
let union_dt = DataType::Union(union_fields.clone(), UnionMode::Dense);
let schema = schema_single("field1", union_dt);

Expand Down
8 changes: 4 additions & 4 deletions arrow-avro/src/codec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -993,13 +993,13 @@ fn union_branch_name(dt: &AvroDataType) -> String {
dt.codec.union_field_name()
}

fn build_union_fields(encodings: &[AvroDataType]) -> UnionFields {
fn build_union_fields(encodings: &[AvroDataType]) -> Result<UnionFields, ArrowError> {
let arrow_fields: Vec<Field> = encodings
.iter()
.map(|encoding| encoding.field_with_name(&union_branch_name(encoding)))
.collect();
let type_ids: Vec<i8> = (0..arrow_fields.len()).map(|i| i as i8).collect();
UnionFields::new(type_ids, arrow_fields)
UnionFields::try_new(type_ids, arrow_fields)
}

/// Resolves Avro type names to [`AvroDataType`]
Expand Down Expand Up @@ -1267,7 +1267,7 @@ impl<'a> Maker<'a> {
.map(|s| self.parse_type(s, namespace))
.collect::<Result<_, _>>()?;
// Build Arrow layout once here
let union_fields = build_union_fields(&children);
let union_fields = build_union_fields(&children)?;
Ok(AvroDataType::new(
Codec::Union(Arc::from(children), union_fields, UnionMode::Dense),
Default::default(),
Expand Down Expand Up @@ -1620,7 +1620,7 @@ impl<'a> Maker<'a> {
for writer in writer_variants {
writer_to_reader.push(self.find_best_promotion(writer, reader_variants, namespace));
}
let union_fields = build_union_fields(&reader_encodings);
let union_fields = build_union_fields(&reader_encodings)?;
let mut dt = AvroDataType::new(
Codec::Union(reader_encodings.into(), union_fields, UnionMode::Dense),
Default::default(),
Expand Down
45 changes: 27 additions & 18 deletions arrow-avro/src/reader/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7839,35 +7839,38 @@ mod test {
let uuid1 = uuid16_from_str("fe7bc30b-4ce8-4c5e-b67c-2234a2d38e66");
let uuid2 = uuid16_from_str("0826cc06-d2e3-4599-b4ad-af5fa6905cdb");
let item_name = Field::LIST_FIELD_DEFAULT_NAME;
let uf_tri = UnionFields::new(
let uf_tri = UnionFields::try_new(
vec![0, 1, 2],
vec![
Field::new("int", DataType::Int32, false),
Field::new("string", DataType::Utf8, false),
Field::new("boolean", DataType::Boolean, false),
],
);
let uf_arr_items = UnionFields::new(
)
.unwrap();
let uf_arr_items = UnionFields::try_new(
vec![0, 1, 2],
vec![
Field::new("null", DataType::Null, false),
Field::new("string", DataType::Utf8, false),
Field::new("long", DataType::Int64, false),
],
);
)
.unwrap();
let arr_items_field = Arc::new(Field::new(
item_name,
DataType::Union(uf_arr_items.clone(), UnionMode::Dense),
true,
));
let uf_map_vals = UnionFields::new(
let uf_map_vals = UnionFields::try_new(
vec![0, 1, 2],
vec![
Field::new("string", DataType::Utf8, false),
Field::new("double", DataType::Float64, false),
Field::new("null", DataType::Null, false),
],
);
)
.unwrap();
let map_entries_field = Arc::new(Field::new(
"entries",
DataType::Struct(Fields::from(vec![
Expand Down Expand Up @@ -7928,7 +7931,7 @@ mod test {
);
m
};
let uf_union_big = UnionFields::new(
let uf_union_big = UnionFields::try_new(
vec![0, 1, 2, 3, 4],
vec![
Field::new(
Expand Down Expand Up @@ -7960,7 +7963,8 @@ mod test {
)
.with_metadata(enum_md_color.clone()),
],
);
)
.unwrap();
let fx4_md = {
let mut m = HashMap::<String, String>::new();
m.insert(AVRO_NAME_METADATA_KEY.to_string(), "Fx4".to_string());
Expand All @@ -7970,7 +7974,7 @@ mod test {
);
m
};
let uf_date_fixed4 = UnionFields::new(
let uf_date_fixed4 = UnionFields::try_new(
vec![0, 1],
vec![
Field::new(
Expand All @@ -7981,7 +7985,8 @@ mod test {
.with_metadata(fx4_md.clone()),
Field::new("date", DataType::Date32, false),
],
);
)
.unwrap();
let dur12u_md = {
let mut m = HashMap::<String, String>::new();
m.insert(AVRO_NAME_METADATA_KEY.to_string(), "Dur12U".to_string());
Expand All @@ -7991,7 +7996,7 @@ mod test {
);
m
};
let uf_dur_or_str = UnionFields::new(
let uf_dur_or_str = UnionFields::try_new(
vec![0, 1],
vec![
Field::new("string", DataType::Utf8, false),
Expand All @@ -8002,7 +8007,8 @@ mod test {
)
.with_metadata(dur12u_md.clone()),
],
);
)
.unwrap();
let fx10_md = {
let mut m = HashMap::<String, String>::new();
m.insert(AVRO_NAME_METADATA_KEY.to_string(), "Fx10".to_string());
Expand All @@ -8012,7 +8018,7 @@ mod test {
);
m
};
let uf_uuid_or_fx10 = UnionFields::new(
let uf_uuid_or_fx10 = UnionFields::try_new(
vec![0, 1],
vec![
Field::new(
Expand All @@ -8023,15 +8029,17 @@ mod test {
.with_metadata(fx10_md.clone()),
add_uuid_ext_union(Field::new("uuid", DataType::FixedSizeBinary(16), false)),
],
);
let uf_kv_val = UnionFields::new(
)
.unwrap();
let uf_kv_val = UnionFields::try_new(
vec![0, 1, 2],
vec![
Field::new("null", DataType::Null, false),
Field::new("int", DataType::Int32, false),
Field::new("long", DataType::Int64, false),
],
);
)
.unwrap();
let kv_fields = Fields::from(vec![
Field::new("key", DataType::Utf8, false),
Field::new(
Expand All @@ -8053,7 +8061,7 @@ mod test {
])),
false,
));
let uf_map_or_array = UnionFields::new(
let uf_map_or_array = UnionFields::try_new(
vec![0, 1],
vec![
Field::new(
Expand All @@ -8063,7 +8071,8 @@ mod test {
),
Field::new("map", DataType::Map(map_int_entries.clone(), false), false),
],
);
)
.unwrap();
let mut enum_md_status = {
let mut m = HashMap::<String, String>::new();
m.insert(
Expand Down
7 changes: 4 additions & 3 deletions arrow-avro/src/reader/record.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3674,7 +3674,7 @@ mod tests {
avro_children.push(AvroDataType::new(codec, Default::default(), None));
fields.push(arrow_schema::Field::new(name, dt, true));
}
let union_fields = UnionFields::new(type_ids, fields);
let union_fields = UnionFields::try_new(type_ids, fields).unwrap();
let union_codec = Codec::Union(avro_children.into(), union_fields, UnionMode::Dense);
AvroDataType::new(union_codec, Default::default(), None)
}
Expand Down Expand Up @@ -3823,13 +3823,14 @@ mod tests {
AvroDataType::new(Codec::Int32, Default::default(), None),
AvroDataType::new(Codec::Utf8, Default::default(), None),
];
let uf = UnionFields::new(
let uf = UnionFields::try_new(
vec![1, 3],
vec![
arrow_schema::Field::new("i", DataType::Int32, true),
arrow_schema::Field::new("s", DataType::Utf8, true),
],
);
)
.unwrap();
let codec = Codec::Union(children.into(), uf, UnionMode::Sparse);
let dt = AvroDataType::new(codec, Default::default(), None);
let err = Decoder::try_new(&dt).expect_err("sparse union should not be supported");
Expand Down
10 changes: 6 additions & 4 deletions arrow-avro/src/writer/encoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2527,13 +2527,14 @@ mod tests {
let strings = StringArray::from(vec!["hello", "world"]);
let ints = Int32Array::from(vec![10, 20, 30]);

let union_fields = UnionFields::new(
let union_fields = UnionFields::try_new(
vec![0, 1],
vec![
Field::new("v_str", DataType::Utf8, true),
Field::new("v_int", DataType::Int32, true),
],
);
)
.unwrap();

let type_ids = Buffer::from_slice_ref([0_i8, 1, 1, 0, 1]);
let offsets = Buffer::from_slice_ref([0_i32, 0, 1, 1, 2]);
Expand Down Expand Up @@ -2584,14 +2585,15 @@ mod tests {
let strings = StringArray::from(vec!["hello"]);
let ints = Int32Array::from(vec![10]);

let union_fields = UnionFields::new(
let union_fields = UnionFields::try_new(
vec![0, 1, 2],
vec![
Field::new("v_null", DataType::Null, true),
Field::new("v_str", DataType::Utf8, true),
Field::new("v_int", DataType::Int32, true),
],
);
)
.unwrap();

let type_ids = Buffer::from_slice_ref([0_i8, 1, 2]);
// For a null value in a dense union, no value is added to a child array.
Expand Down
5 changes: 3 additions & 2 deletions arrow-cast/src/pretty.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1610,10 +1610,11 @@ mod tests {
extension::EXTENSION_TYPE_NAME_KEY.to_owned(),
"my_money".to_owned(),
)]);
let fields = UnionFields::new(
let fields = UnionFields::try_new(
vec![0],
vec![Field::new("income", DataType::Int32, true).with_metadata(money_metadata.clone())],
);
)
.unwrap();

// Create nested data and construct it with the correct metadata
let mut array_builder = UnionBuilder::new_dense();
Expand Down
Loading
Loading