-
Notifications
You must be signed in to change notification settings - Fork 838
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add timezone abstraction #2909
Add timezone abstraction #2909
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1470,23 +1470,6 @@ mod tests { | |
assert_eq!(array1, array2); | ||
} | ||
|
||
#[cfg(feature = "chrono-tz")] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This test is moved to timezone.rs with some tweaks |
||
#[test] | ||
fn test_with_timezone() { | ||
use crate::compute::hour; | ||
let a: TimestampMicrosecondArray = vec![37800000000, 86339000000].into(); | ||
|
||
let b = hour(&a).unwrap(); | ||
assert_eq!(10, b.value(0)); | ||
assert_eq!(23, b.value(1)); | ||
|
||
let a = a.with_timezone(String::from("America/Los_Angeles")); | ||
|
||
let b = hour(&a).unwrap(); | ||
assert_eq!(2, b.value(0)); | ||
assert_eq!(15, b.value(1)); | ||
} | ||
|
||
#[test] | ||
#[should_panic( | ||
expected = "Trying to access an element at index 4 from a PrimitiveArray of length 3" | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,9 +17,10 @@ | |
|
||
//! Conversion methods for dates and times. | ||
|
||
use crate::timezone::Tz; | ||
use crate::ArrowPrimitiveType; | ||
use arrow_schema::{DataType, TimeUnit}; | ||
use chrono::{Duration, NaiveDate, NaiveDateTime, NaiveTime}; | ||
use chrono::{DateTime, Duration, NaiveDate, NaiveDateTime, NaiveTime, TimeZone, Utc}; | ||
|
||
/// Number of seconds in a day | ||
pub const SECONDS_IN_DAY: i64 = 86_400; | ||
|
@@ -187,6 +188,15 @@ pub fn as_datetime<T: ArrowPrimitiveType>(v: i64) -> Option<NaiveDateTime> { | |
} | ||
} | ||
|
||
/// Converts an [`ArrowPrimitiveType`] to [`DateTime<Tz>`] | ||
pub fn as_datetime_with_timezone<T: ArrowPrimitiveType>( | ||
v: i64, | ||
tz: Tz, | ||
) -> Option<DateTime<Tz>> { | ||
let naive = as_datetime::<T>(v)?; | ||
Some(Utc.from_utc_datetime(&naive).with_timezone(&tz)) | ||
} | ||
|
||
Comment on lines
+191
to
+199
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this could enable displaying Timestamp with timezone 👍 |
||
/// Converts an [`ArrowPrimitiveType`] to [`NaiveDate`] | ||
pub fn as_date<T: ArrowPrimitiveType>(v: i64) -> Option<NaiveDate> { | ||
as_datetime::<T>(v).map(|datetime| datetime.date()) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,325 @@ | ||
// Licensed to the Apache Software Foundation (ASF) under one | ||
// or more contributor license agreements. See the NOTICE file | ||
// distributed with this work for additional information | ||
// regarding copyright ownership. The ASF licenses this file | ||
// to you under the Apache License, Version 2.0 (the | ||
// "License"); you may not use this file except in compliance | ||
// with the License. You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, | ||
// software distributed under the License is distributed on an | ||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
// KIND, either express or implied. See the License for the | ||
// specific language governing permissions and limitations | ||
// under the License. | ||
|
||
//! Timezone for timestamp arrays | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it is worth documenting somewhere what enabling the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It isn't a new feature and is already documented |
||
|
||
use arrow_schema::ArrowError; | ||
use chrono::format::{parse, Parsed, StrftimeItems}; | ||
use chrono::FixedOffset; | ||
pub use private::{Tz, TzOffset}; | ||
|
||
/// Parses a fixed offset of the form "+09:00" | ||
fn parse_fixed_offset(tz: &str) -> Result<FixedOffset, ArrowError> { | ||
if tz.len() != 6 { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not sure why we are so strict on this, There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, there was just an explicit test that +0930 was not accepted, which made me think the lack of support was intentional There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. just submitted a ticket, i can implement this after the pr merged |
||
return Err(ArrowError::ParseError(format!( | ||
"Invalid timezone \"{}\": Expected format [+-]XX:XX", | ||
tz | ||
))); | ||
} | ||
|
||
let mut parsed = Parsed::new(); | ||
parse(&mut parsed, tz, StrftimeItems::new("%:z")) | ||
.and_then(|_| parsed.to_fixed_offset()) | ||
.map_err(|e| { | ||
ArrowError::ParseError(format!("Invalid timezone \"{}\": {}", tz, e)) | ||
}) | ||
} | ||
|
||
#[cfg(feature = "chrono-tz")] | ||
mod private { | ||
use super::*; | ||
use chrono::offset::TimeZone; | ||
use chrono::{LocalResult, NaiveDate, NaiveDateTime, Offset}; | ||
use std::str::FromStr; | ||
|
||
/// An [`Offset`] for [`Tz`] | ||
#[derive(Debug, Copy, Clone)] | ||
pub struct TzOffset { | ||
tz: Tz, | ||
offset: FixedOffset, | ||
} | ||
|
||
impl std::fmt::Display for TzOffset { | ||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | ||
self.offset.fmt(f) | ||
} | ||
} | ||
|
||
impl Offset for TzOffset { | ||
fn fix(&self) -> FixedOffset { | ||
self.offset | ||
} | ||
} | ||
|
||
/// An Arrow [`TimeZone`] | ||
#[derive(Debug, Copy, Clone)] | ||
pub struct Tz(TzInner); | ||
|
||
#[derive(Debug, Copy, Clone)] | ||
enum TzInner { | ||
Timezone(chrono_tz::Tz), | ||
Offset(FixedOffset), | ||
} | ||
|
||
impl FromStr for Tz { | ||
type Err = ArrowError; | ||
|
||
fn from_str(tz: &str) -> Result<Self, Self::Err> { | ||
if tz.starts_with('+') || tz.starts_with('-') { | ||
Ok(Self(TzInner::Offset(parse_fixed_offset(tz)?))) | ||
} else { | ||
Ok(Self(TzInner::Timezone(tz.parse().map_err(|e| { | ||
ArrowError::ParseError(format!("Invalid timezone \"{}\": {}", tz, e)) | ||
})?))) | ||
} | ||
} | ||
} | ||
|
||
macro_rules! tz { | ||
($s:ident, $tz:ident, $b:block) => { | ||
match $s.0 { | ||
TzInner::Timezone($tz) => $b, | ||
TzInner::Offset($tz) => $b, | ||
} | ||
}; | ||
} | ||
|
||
impl TimeZone for Tz { | ||
type Offset = TzOffset; | ||
|
||
fn from_offset(offset: &Self::Offset) -> Self { | ||
offset.tz | ||
} | ||
|
||
fn offset_from_local_date(&self, local: &NaiveDate) -> LocalResult<Self::Offset> { | ||
tz!(self, tz, { | ||
tz.offset_from_local_date(local).map(|x| TzOffset { | ||
tz: *self, | ||
offset: x.fix(), | ||
}) | ||
}) | ||
} | ||
|
||
fn offset_from_local_datetime( | ||
&self, | ||
local: &NaiveDateTime, | ||
) -> LocalResult<Self::Offset> { | ||
tz!(self, tz, { | ||
tz.offset_from_local_datetime(local).map(|x| TzOffset { | ||
tz: *self, | ||
offset: x.fix(), | ||
}) | ||
}) | ||
} | ||
|
||
fn offset_from_utc_date(&self, utc: &NaiveDate) -> Self::Offset { | ||
tz!(self, tz, { | ||
TzOffset { | ||
tz: *self, | ||
offset: tz.offset_from_utc_date(utc).fix(), | ||
} | ||
}) | ||
} | ||
|
||
fn offset_from_utc_datetime(&self, utc: &NaiveDateTime) -> Self::Offset { | ||
tz!(self, tz, { | ||
TzOffset { | ||
tz: *self, | ||
offset: tz.offset_from_utc_datetime(utc).fix(), | ||
} | ||
}) | ||
} | ||
} | ||
|
||
#[cfg(test)] | ||
mod tests { | ||
use super::*; | ||
use chrono::{Timelike, Utc}; | ||
|
||
#[test] | ||
fn test_with_timezone() { | ||
let vals = [ | ||
Utc.timestamp_millis(37800000), | ||
Utc.timestamp_millis(86339000), | ||
]; | ||
|
||
assert_eq!(10, vals[0].hour()); | ||
assert_eq!(23, vals[1].hour()); | ||
|
||
let tz: Tz = "America/Los_Angeles".parse().unwrap(); | ||
|
||
assert_eq!(2, vals[0].with_timezone(&tz).hour()); | ||
assert_eq!(15, vals[1].with_timezone(&tz).hour()); | ||
} | ||
|
||
#[test] | ||
fn test_using_chrono_tz_and_utc_naive_date_time() { | ||
let sydney_tz = "Australia/Sydney".to_string(); | ||
let tz: Tz = sydney_tz.parse().unwrap(); | ||
let sydney_offset_without_dst = FixedOffset::east(10 * 60 * 60); | ||
let sydney_offset_with_dst = FixedOffset::east(11 * 60 * 60); | ||
// Daylight savings ends | ||
// When local daylight time was about to reach | ||
// Sunday, 4 April 2021, 3:00:00 am clocks were turned backward 1 hour to | ||
// Sunday, 4 April 2021, 2:00:00 am local standard time instead. | ||
|
||
// Daylight savings starts | ||
// When local standard time was about to reach | ||
// Sunday, 3 October 2021, 2:00:00 am clocks were turned forward 1 hour to | ||
// Sunday, 3 October 2021, 3:00:00 am local daylight time instead. | ||
|
||
// Sydney 2021-04-04T02:30:00+11:00 is 2021-04-03T15:30:00Z | ||
let utc_just_before_sydney_dst_ends = | ||
NaiveDate::from_ymd(2021, 4, 3).and_hms_nano(15, 30, 0, 0); | ||
assert_eq!( | ||
tz.offset_from_utc_datetime(&utc_just_before_sydney_dst_ends) | ||
.fix(), | ||
sydney_offset_with_dst | ||
); | ||
// Sydney 2021-04-04T02:30:00+10:00 is 2021-04-03T16:30:00Z | ||
let utc_just_after_sydney_dst_ends = | ||
NaiveDate::from_ymd(2021, 4, 3).and_hms_nano(16, 30, 0, 0); | ||
assert_eq!( | ||
tz.offset_from_utc_datetime(&utc_just_after_sydney_dst_ends) | ||
.fix(), | ||
sydney_offset_without_dst | ||
); | ||
// Sydney 2021-10-03T01:30:00+10:00 is 2021-10-02T15:30:00Z | ||
let utc_just_before_sydney_dst_starts = | ||
NaiveDate::from_ymd(2021, 10, 2).and_hms_nano(15, 30, 0, 0); | ||
assert_eq!( | ||
tz.offset_from_utc_datetime(&utc_just_before_sydney_dst_starts) | ||
.fix(), | ||
sydney_offset_without_dst | ||
); | ||
// Sydney 2021-04-04T03:30:00+11:00 is 2021-10-02T16:30:00Z | ||
let utc_just_after_sydney_dst_starts = | ||
NaiveDate::from_ymd(2022, 10, 2).and_hms_nano(16, 30, 0, 0); | ||
assert_eq!( | ||
tz.offset_from_utc_datetime(&utc_just_after_sydney_dst_starts) | ||
.fix(), | ||
sydney_offset_with_dst | ||
); | ||
} | ||
} | ||
} | ||
|
||
#[cfg(not(feature = "chrono-tz"))] | ||
mod private { | ||
use super::*; | ||
use chrono::offset::TimeZone; | ||
use chrono::{FixedOffset, LocalResult, NaiveDate, NaiveDateTime, Offset}; | ||
use std::str::FromStr; | ||
|
||
/// An [`Offset`] for [`Tz`] | ||
#[derive(Debug, Copy, Clone)] | ||
pub struct TzOffset(FixedOffset); | ||
|
||
impl std::fmt::Display for TzOffset { | ||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | ||
self.0.fmt(f) | ||
} | ||
} | ||
|
||
impl Offset for TzOffset { | ||
fn fix(&self) -> FixedOffset { | ||
self.0 | ||
} | ||
} | ||
|
||
/// An Arrow [`TimeZone`] | ||
#[derive(Debug, Copy, Clone)] | ||
pub struct Tz(FixedOffset); | ||
|
||
impl FromStr for Tz { | ||
type Err = ArrowError; | ||
|
||
fn from_str(tz: &str) -> Result<Self, Self::Err> { | ||
if tz.starts_with('+') || tz.starts_with('-') { | ||
Ok(Self(parse_fixed_offset(tz)?)) | ||
} else { | ||
Err(ArrowError::ParseError(format!( | ||
"Invalid timezone \"{}\": only offset based timezones supported without chrono-tz feature", | ||
tz | ||
))) | ||
} | ||
} | ||
} | ||
|
||
impl TimeZone for Tz { | ||
type Offset = TzOffset; | ||
|
||
fn from_offset(offset: &Self::Offset) -> Self { | ||
Self(offset.0) | ||
} | ||
|
||
fn offset_from_local_date(&self, local: &NaiveDate) -> LocalResult<Self::Offset> { | ||
self.0.offset_from_local_date(local).map(TzOffset) | ||
} | ||
|
||
fn offset_from_local_datetime( | ||
&self, | ||
local: &NaiveDateTime, | ||
) -> LocalResult<Self::Offset> { | ||
self.0.offset_from_local_datetime(local).map(TzOffset) | ||
} | ||
|
||
fn offset_from_utc_date(&self, utc: &NaiveDate) -> Self::Offset { | ||
TzOffset(self.0.offset_from_utc_date(utc).fix()) | ||
} | ||
|
||
fn offset_from_utc_datetime(&self, utc: &NaiveDateTime) -> Self::Offset { | ||
TzOffset(self.0.offset_from_utc_datetime(utc).fix()) | ||
} | ||
} | ||
} | ||
|
||
#[cfg(test)] | ||
mod tests { | ||
use super::*; | ||
use chrono::{NaiveDate, Offset, TimeZone}; | ||
|
||
#[test] | ||
fn test_with_offset() { | ||
let t = NaiveDate::from_ymd(2000, 1, 1); | ||
|
||
let tz: Tz = "-00:00".parse().unwrap(); | ||
assert_eq!(tz.offset_from_utc_date(&t).fix().local_minus_utc(), 0); | ||
let tz: Tz = "+00:00".parse().unwrap(); | ||
assert_eq!(tz.offset_from_utc_date(&t).fix().local_minus_utc(), 0); | ||
|
||
let tz: Tz = "-10:00".parse().unwrap(); | ||
assert_eq!( | ||
tz.offset_from_utc_date(&t).fix().local_minus_utc(), | ||
-10 * 60 * 60 | ||
); | ||
let tz: Tz = "+09:00".parse().unwrap(); | ||
assert_eq!( | ||
tz.offset_from_utc_date(&t).fix().local_minus_utc(), | ||
9 * 60 * 60 | ||
); | ||
|
||
let err = "+9:00".parse::<Tz>().unwrap_err().to_string(); | ||
assert!(err.contains("Invalid timezone"), "{}", err); | ||
|
||
let err = "+09".parse::<Tz>().unwrap_err().to_string(); | ||
assert!(err.contains("Invalid timezone"), "{}", err); | ||
|
||
let err = "+0900".parse::<Tz>().unwrap_err().to_string(); | ||
assert!(err.contains("Invalid timezone"), "{}", err); | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I verified we added an equivalent feature to arrow/Cargo.toml so people can activate this feature from arrow