Skip to content

Commit a7572eb

Browse files
authored
[Variant] Implement primitive type access for null/time/decimal* (#8638)
# Which issue does this PR close? - Closes #8637. - Support Variant to Arrow for Null/Time/Decimlal{4,8,16} # What changes are included in this PR? - Add logic in `typed_value_to_variant`/`PrimitiveVariantToArrowRowBuilder` for `Null/Time/Decimal{4,8,16}` - Implement `PrimitiveFromVariant` for `Time64MicrosecondType` - Add tests to cover the added logic - # Are these changes tested? Added some tests # Are there any user-facing changes? No
1 parent 89846a8 commit a7572eb

File tree

4 files changed

+252
-12
lines changed

4 files changed

+252
-12
lines changed

parquet-variant-compute/src/type_conversion.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ use arrow::datatypes::{
2323
self, ArrowPrimitiveType, ArrowTimestampType, Decimal32Type, Decimal64Type, Decimal128Type,
2424
DecimalType,
2525
};
26+
use chrono::Timelike;
2627
use parquet_variant::{Variant, VariantDecimal4, VariantDecimal8, VariantDecimal16};
2728

2829
/// Options for controlling the behavior of `cast_to_variant_with_options`.
@@ -89,6 +90,9 @@ impl_primitive_from_variant!(
8990
as_naive_date,
9091
datatypes::Date32Type::from_naive_date
9192
);
93+
impl_primitive_from_variant!(datatypes::Time64MicrosecondType, as_time_utc, |v| {
94+
(v.num_seconds_from_midnight() * 1_000_000 + v.nanosecond() / 1_000) as i64
95+
});
9296
impl_timestamp_from_variant!(
9397
datatypes::TimestampMicrosecondType,
9498
as_timestamp_ntz_micros,

parquet-variant-compute/src/variant_array.rs

Lines changed: 54 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -23,12 +23,13 @@ use arrow::array::{Array, ArrayRef, AsArray, BinaryViewArray, StructArray};
2323
use arrow::buffer::NullBuffer;
2424
use arrow::compute::cast;
2525
use arrow::datatypes::{
26-
Date32Type, Float16Type, Float32Type, Float64Type, Int8Type, Int16Type, Int32Type, Int64Type,
26+
Date32Type, Decimal32Type, Decimal64Type, Decimal128Type, Float16Type, Float32Type,
27+
Float64Type, Int8Type, Int16Type, Int32Type, Int64Type, Time64MicrosecondType,
2728
TimestampMicrosecondType, TimestampNanosecondType,
2829
};
2930
use arrow_schema::extension::ExtensionType;
3031
use arrow_schema::{ArrowError, DataType, Field, FieldRef, Fields, TimeUnit};
31-
use chrono::DateTime;
32+
use chrono::{DateTime, NaiveTime};
3233
use parquet_variant::{
3334
Uuid, Variant, VariantDecimal4, VariantDecimal8, VariantDecimal16, VariantDecimalType as _,
3435
};
@@ -539,7 +540,7 @@ impl<'a> DoubleEndedIterator for VariantArrayIter<'a> {
539540

540541
impl<'a> ExactSizeIterator for VariantArrayIter<'a> {}
541542

542-
/// One shredded field of a partially or prefectly shredded variant. For example, suppose the
543+
/// One shredded field of a partially or perfectly shredded variant. For example, suppose the
543544
/// shredding schema for variant `v` treats it as an object with a single field `a`, where `a` is
544545
/// itself a struct with the single field `b` of type INT. Then the physical layout of the column
545546
/// is:
@@ -920,17 +921,12 @@ fn typed_value_to_variant<'a>(
920921
panic!("Invalid variant, conflicting value and typed_value");
921922
}
922923
match data_type {
924+
DataType::Null => Variant::Null,
923925
DataType::Boolean => {
924926
let boolean_array = typed_value.as_boolean();
925927
let value = boolean_array.value(index);
926928
Variant::from(value)
927929
}
928-
DataType::Date32 => {
929-
let array = typed_value.as_primitive::<Date32Type>();
930-
let value = array.value(index);
931-
let date = Date32Type::to_naive_date(value);
932-
Variant::from(date)
933-
}
934930
// 16-byte FixedSizeBinary alway corresponds to a UUID; all other sizes are illegal.
935931
DataType::FixedSizeBinary(16) => {
936932
let array = typed_value.as_fixed_size_binary();
@@ -968,6 +964,55 @@ fn typed_value_to_variant<'a>(
968964
DataType::Float64 => {
969965
primitive_conversion_single_value!(Float64Type, typed_value, index)
970966
}
967+
DataType::Decimal32(_, s) => {
968+
generic_conversion_single_value!(
969+
Decimal32Type,
970+
as_primitive,
971+
|v| VariantDecimal4::try_new(v, *s as u8).map_or(Variant::Null, Variant::from),
972+
typed_value,
973+
index
974+
)
975+
}
976+
DataType::Decimal64(_, s) => {
977+
generic_conversion_single_value!(
978+
Decimal64Type,
979+
as_primitive,
980+
|v| VariantDecimal8::try_new(v, *s as u8).map_or(Variant::Null, Variant::from),
981+
typed_value,
982+
index
983+
)
984+
}
985+
DataType::Decimal128(_, s) => {
986+
generic_conversion_single_value!(
987+
Decimal128Type,
988+
as_primitive,
989+
|v| VariantDecimal16::try_new(v, *s as u8).map_or(Variant::Null, Variant::from),
990+
typed_value,
991+
index
992+
)
993+
}
994+
DataType::Date32 => {
995+
generic_conversion_single_value!(
996+
Date32Type,
997+
as_primitive,
998+
Date32Type::to_naive_date,
999+
typed_value,
1000+
index
1001+
)
1002+
}
1003+
DataType::Time64(TimeUnit::Microsecond) => {
1004+
generic_conversion_single_value!(
1005+
Time64MicrosecondType,
1006+
as_primitive,
1007+
|v| NaiveTime::from_num_seconds_from_midnight_opt(
1008+
(v / 1_000_000) as u32,
1009+
(v % 1_000_000) as u32 * 1000
1010+
)
1011+
.map_or(Variant::Null, Variant::from),
1012+
typed_value,
1013+
index
1014+
)
1015+
}
9711016
DataType::Timestamp(TimeUnit::Microsecond, Some(_)) => {
9721017
generic_conversion_single_value!(
9731018
TimestampMicrosecondType,

parquet-variant-compute/src/variant_get.rs

Lines changed: 157 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -293,13 +293,17 @@ impl<'a> GetOptions<'a> {
293293

294294
#[cfg(test)]
295295
mod test {
296+
use std::str::FromStr;
297+
use std::sync::Arc;
298+
296299
use super::{GetOptions, variant_get};
297300
use crate::variant_array::{ShreddedVariantFieldArray, StructArrayBuilder};
298301
use crate::{VariantArray, VariantArrayBuilder, json_to_variant};
299302
use arrow::array::{
300303
Array, ArrayRef, AsArray, BinaryViewArray, BooleanArray, Date32Array, Decimal32Array,
301304
Decimal64Array, Decimal128Array, Decimal256Array, Float32Array, Float64Array, Int8Array,
302-
Int16Array, Int32Array, Int64Array, StringArray, StructArray,
305+
Int16Array, Int32Array, Int64Array, NullBuilder, StringArray, StructArray,
306+
Time64MicrosecondArray,
303307
};
304308
use arrow::buffer::NullBuffer;
305309
use arrow::compute::CastOptions;
@@ -312,7 +316,6 @@ mod test {
312316
EMPTY_VARIANT_METADATA_BYTES, Variant, VariantDecimal4, VariantDecimal8, VariantDecimal16,
313317
VariantDecimalType, VariantPath,
314318
};
315-
use std::sync::Arc;
316319

317320
fn single_variant_get_test(input_json: &str, path: VariantPath, expected_json: &str) {
318321
// Create input array from JSON string
@@ -969,6 +972,158 @@ mod test {
969972
Date32Array::from(vec![Some(-12345), Some(17586), Some(20000)])
970973
);
971974

975+
perfectly_shredded_variant_array_fn!(perfectly_shredded_time_variant_array, || {
976+
Time64MicrosecondArray::from(vec![Some(12345000), Some(87654000), Some(135792000)])
977+
});
978+
979+
perfectly_shredded_to_arrow_primitive_test!(
980+
get_variant_perfectly_shredded_time_as_time,
981+
DataType::Time64(TimeUnit::Microsecond),
982+
perfectly_shredded_time_variant_array,
983+
Time64MicrosecondArray::from(vec![Some(12345000), Some(87654000), Some(135792000)])
984+
);
985+
986+
perfectly_shredded_variant_array_fn!(perfectly_shredded_null_variant_array, || {
987+
let mut builder = NullBuilder::new();
988+
builder.append_nulls(3);
989+
builder.finish()
990+
});
991+
992+
perfectly_shredded_to_arrow_primitive_test!(
993+
get_variant_perfectly_shredded_null_as_null,
994+
DataType::Null,
995+
perfectly_shredded_null_variant_array,
996+
arrow::array::NullArray::new(3)
997+
);
998+
999+
perfectly_shredded_variant_array_fn!(perfectly_shredded_decimal4_variant_array, || {
1000+
Decimal32Array::from(vec![Some(12345), Some(23400), Some(-12342)])
1001+
.with_precision_and_scale(5, 2)
1002+
.unwrap()
1003+
});
1004+
1005+
perfectly_shredded_to_arrow_primitive_test!(
1006+
get_variant_perfectly_shredded_decimal4_as_decimal4,
1007+
DataType::Decimal32(5, 2),
1008+
perfectly_shredded_decimal4_variant_array,
1009+
Decimal32Array::from(vec![Some(12345), Some(23400), Some(-12342)])
1010+
.with_precision_and_scale(5, 2)
1011+
.unwrap()
1012+
);
1013+
1014+
perfectly_shredded_variant_array_fn!(
1015+
perfectly_shredded_decimal8_variant_array_cast2decimal32,
1016+
|| {
1017+
Decimal64Array::from(vec![Some(123456), Some(145678), Some(-123456)])
1018+
.with_precision_and_scale(6, 1)
1019+
.unwrap()
1020+
}
1021+
);
1022+
1023+
// The input will be cast to Decimal32 when transformed to Variant
1024+
// This tests will covert the logic DataType::Decimal64(the original array)
1025+
// -> Variant::Decimal4(VariantArray) -> DataType::Decimal64(the result array)
1026+
perfectly_shredded_to_arrow_primitive_test!(
1027+
get_variant_perfectly_shredded_decimal8_through_decimal32_as_decimal8,
1028+
DataType::Decimal64(6, 1),
1029+
perfectly_shredded_decimal8_variant_array_cast2decimal32,
1030+
Decimal64Array::from(vec![Some(123456), Some(145678), Some(-123456)])
1031+
.with_precision_and_scale(6, 1)
1032+
.unwrap()
1033+
);
1034+
1035+
// This tests will covert the logic DataType::Decimal64(the original array)
1036+
// -> Variant::Decimal8(VariantArray) -> DataType::Decimal64(the result array)
1037+
perfectly_shredded_variant_array_fn!(perfectly_shredded_decimal8_variant_array, || {
1038+
Decimal64Array::from(vec![Some(1234567809), Some(1456787000), Some(-1234561203)])
1039+
.with_precision_and_scale(10, 1)
1040+
.unwrap()
1041+
});
1042+
1043+
perfectly_shredded_to_arrow_primitive_test!(
1044+
get_variant_perfectly_shredded_decimal8_as_decimal8,
1045+
DataType::Decimal64(10, 1),
1046+
perfectly_shredded_decimal8_variant_array,
1047+
Decimal64Array::from(vec![Some(1234567809), Some(1456787000), Some(-1234561203)])
1048+
.with_precision_and_scale(10, 1)
1049+
.unwrap()
1050+
);
1051+
1052+
// This tests will covert the logic DataType::Decimal128(the original array)
1053+
// -> Variant::Decimal4(VariantArray) -> DataType::Decimal128(the result array)
1054+
perfectly_shredded_variant_array_fn!(
1055+
perfectly_shredded_decimal16_within_decimal4_variant_array,
1056+
|| {
1057+
Decimal128Array::from(vec![
1058+
Some(i128::from(1234589)),
1059+
Some(i128::from(2344444)),
1060+
Some(i128::from(-1234789)),
1061+
])
1062+
.with_precision_and_scale(7, 3)
1063+
.unwrap()
1064+
}
1065+
);
1066+
1067+
// This tests will covert the logic DataType::Decimal128(the original array)
1068+
// -> Variant::Decimal4(VariantArray) -> DataType::Decimal128(the result array)
1069+
perfectly_shredded_to_arrow_primitive_test!(
1070+
get_variant_perfectly_shredded_decimal16_within_decimal4_as_decimal16,
1071+
DataType::Decimal128(7, 3),
1072+
perfectly_shredded_decimal16_within_decimal4_variant_array,
1073+
Decimal128Array::from(vec![
1074+
Some(i128::from(1234589)),
1075+
Some(i128::from(2344444)),
1076+
Some(i128::from(-1234789)),
1077+
])
1078+
.with_precision_and_scale(7, 3)
1079+
.unwrap()
1080+
);
1081+
1082+
perfectly_shredded_variant_array_fn!(
1083+
perfectly_shredded_decimal16_within_decimal8_variant_array,
1084+
|| {
1085+
Decimal128Array::from(vec![Some(1234567809), Some(1456787000), Some(-1234561203)])
1086+
.with_precision_and_scale(10, 1)
1087+
.unwrap()
1088+
}
1089+
);
1090+
1091+
// This tests will covert the logic DataType::Decimal128(the original array)
1092+
// -> Variant::Decimal8(VariantArray) -> DataType::Decimal128(the result array)
1093+
perfectly_shredded_to_arrow_primitive_test!(
1094+
get_variant_perfectly_shredded_decimal16_within8_as_decimal16,
1095+
DataType::Decimal128(10, 1),
1096+
perfectly_shredded_decimal16_within_decimal8_variant_array,
1097+
Decimal128Array::from(vec![Some(1234567809), Some(1456787000), Some(-1234561203)])
1098+
.with_precision_and_scale(10, 1)
1099+
.unwrap()
1100+
);
1101+
1102+
perfectly_shredded_variant_array_fn!(perfectly_shredded_decimal16_variant_array, || {
1103+
Decimal128Array::from(vec![
1104+
Some(i128::from_str("12345678901234567899").unwrap()),
1105+
Some(i128::from_str("23445677483748324300").unwrap()),
1106+
Some(i128::from_str("-12345678901234567899").unwrap()),
1107+
])
1108+
.with_precision_and_scale(20, 3)
1109+
.unwrap()
1110+
});
1111+
1112+
// This tests will covert the logic DataType::Decimal128(the original array)
1113+
// -> Variant::Decimal16(VariantArray) -> DataType::Decimal128(the result array)
1114+
perfectly_shredded_to_arrow_primitive_test!(
1115+
get_variant_perfectly_shredded_decimal16_as_decimal16,
1116+
DataType::Decimal128(20, 3),
1117+
perfectly_shredded_decimal16_variant_array,
1118+
Decimal128Array::from(vec![
1119+
Some(i128::from_str("12345678901234567899").unwrap()),
1120+
Some(i128::from_str("23445677483748324300").unwrap()),
1121+
Some(i128::from_str("-12345678901234567899").unwrap())
1122+
])
1123+
.with_precision_and_scale(20, 3)
1124+
.unwrap()
1125+
);
1126+
9721127
macro_rules! assert_variant_get_as_variant_array_with_default_option {
9731128
($variant_array: expr, $array_expected: expr) => {{
9741129
let options = GetOptions::new();

0 commit comments

Comments
 (0)