From f8eee2247d3d7a0d993ab38d1f93447a4622dd82 Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies Date: Fri, 24 Jun 2022 11:11:39 +0100 Subject: [PATCH] Set adjusted to UTC if UTC timezone (#1932) --- parquet/src/arrow/schema.rs | 39 +++++++++++++++++++++++++++++++++++-- 1 file changed, 37 insertions(+), 2 deletions(-) diff --git a/parquet/src/arrow/schema.rs b/parquet/src/arrow/schema.rs index f3d0a3d9b36b..71ae11d089d0 100644 --- a/parquet/src/arrow/schema.rs +++ b/parquet/src/arrow/schema.rs @@ -300,10 +300,15 @@ fn arrow_to_parquet_type(field: &Field) -> Result { .with_repetition(repetition) .build() } - DataType::Timestamp(time_unit, _) => { + DataType::Timestamp(time_unit, tz) => { + let is_utc = tz + .as_ref() + .map(|tz| tz == "UTC" || tz == "+00:00" || tz == "-00:00") + .unwrap_or(false); + Type::primitive_type_builder(name, PhysicalType::INT64) .with_logical_type(Some(LogicalType::Timestamp { - is_adjusted_to_u_t_c: false, + is_adjusted_to_u_t_c: is_utc, unit: match time_unit { TimeUnit::Second => unreachable!(), TimeUnit::Millisecond => { @@ -1281,6 +1286,11 @@ mod tests { OPTIONAL INT64 time_micro (TIME_MICROS); OPTIONAL INT64 ts_milli (TIMESTAMP_MILLIS); REQUIRED INT64 ts_micro (TIMESTAMP(MICROS,false)); + REQUIRED INT64 ts_seconds; + REQUIRED INT64 ts_micro_utc (TIMESTAMP(MICROS, true)); + REQUIRED INT64 ts_millis_zero_offset (TIMESTAMP(MILLIS, true)); + REQUIRED INT64 ts_millis_zero_negative_offset (TIMESTAMP(MILLIS, true)); + REQUIRED INT64 ts_micro_non_utc (TIMESTAMP(MICROS, false)); REQUIRED GROUP struct { REQUIRED BOOLEAN bools; REQUIRED INT32 uint32 (INTEGER(32,false)); @@ -1329,6 +1339,31 @@ mod tests { DataType::Timestamp(TimeUnit::Microsecond, None), false, ), + Field::new( + "ts_seconds", + DataType::Timestamp(TimeUnit::Second, Some("UTC".to_string())), + false, + ), + Field::new( + "ts_micro_utc", + DataType::Timestamp(TimeUnit::Microsecond, Some("UTC".to_string())), + false, + ), + Field::new( + "ts_millis_zero_offset", + DataType::Timestamp(TimeUnit::Millisecond, Some("+00:00".to_string())), + false, + ), + Field::new( + "ts_millis_zero_negative_offset", + DataType::Timestamp(TimeUnit::Millisecond, Some("-00:00".to_string())), + false, + ), + Field::new( + "ts_micro_non_utc", + DataType::Timestamp(TimeUnit::Microsecond, Some("+01:00".to_string())), + false, + ), Field::new( "struct", DataType::Struct(vec![