From 90887c25d9e6159cde94f67ded2f7be308c7f84d Mon Sep 17 00:00:00 2001 From: Vasanthakumar Vijayasekaran Date: Sun, 26 Sep 2021 20:58:09 +0530 Subject: [PATCH] Extract parts of datetime (#433) --- src/compute/temporal.rs | 516 ++++++++++++++++++++--------------- tests/it/compute/temporal.rs | 425 ++++++++++++++++++++--------- 2 files changed, 589 insertions(+), 352 deletions(-) diff --git a/src/compute/temporal.rs b/src/compute/temporal.rs index b098c890778..6ed750b65fc 100644 --- a/src/compute/temporal.rs +++ b/src/compute/temporal.rs @@ -28,6 +28,251 @@ use crate::types::NaturalDataType; use super::arity::unary; +// Create and implement a trait that converts chrono's `Weekday` +// type into `u32` +trait U32Weekday: Datelike { + fn u32_weekday(&self) -> u32 { + self.weekday().number_from_monday() + } +} + +impl U32Weekday for chrono::NaiveDateTime {} +impl U32Weekday for chrono::DateTime {} + +// Create and implement a trait that converts chrono's `IsoWeek` +// type into `u32` +trait U32IsoWeek: Datelike { + fn u32_iso_week(&self) -> u32 { + self.iso_week().week() + } +} + +impl U32IsoWeek for chrono::NaiveDateTime {} +impl U32IsoWeek for chrono::DateTime {} + +// Macro to avoid repetition in functions, that apply +// `chrono::Datelike` methods on Arrays +macro_rules! date_like { + ($extract:ident, $array:ident, $data_type:path) => { + match $array.data_type().to_logical_type() { + DataType::Date32 | DataType::Date64 | DataType::Timestamp(_, None) => { + date_variants($array, $data_type, |x| x.$extract()) + } + DataType::Timestamp(time_unit, Some(timezone_str)) => { + let array = $array.as_any().downcast_ref().unwrap(); + + if let Ok(timezone) = parse_offset(timezone_str) { + Ok(extract_impl(array, *time_unit, timezone, |x| x.$extract())) + } else { + chrono_tz(array, *time_unit, timezone_str, |x| x.$extract()) + } + } + dt => Err(ArrowError::NotYetImplemented(format!( + "\"{}\" does not support type {:?}", + stringify!($extract), + dt + ))), + } + }; +} + +/// Extracts the years of a temporal array as [`PrimitiveArray`]. +/// Use [`can_year`] to check if this operation is supported for the target [`DataType`]. +pub fn year(array: &dyn Array) -> Result> { + date_like!(year, array, DataType::Int32) +} + +/// Extracts the months of a temporal array as [`PrimitiveArray`]. +/// Value ranges from 1 to 12. +/// Use [`can_month`] to check if this operation is supported for the target [`DataType`]. +pub fn month(array: &dyn Array) -> Result> { + date_like!(month, array, DataType::UInt32) +} + +/// Extracts the days of a temporal array as [`PrimitiveArray`]. +/// Value ranges from 1 to 32 (Last day depends on month). +/// Use [`can_day`] to check if this operation is supported for the target [`DataType`]. +pub fn day(array: &dyn Array) -> Result> { + date_like!(day, array, DataType::UInt32) +} + +/// Extracts weekday of a temporal array as [`PrimitiveArray`]. +/// Monday is 1, Tuesday is 2, ..., Sunday is 7. +/// Use [`can_weekday`] to check if this operation is supported for the target [`DataType`] +pub fn weekday(array: &dyn Array) -> Result> { + date_like!(u32_weekday, array, DataType::UInt32) +} + +/// Extracts ISO week of a temporal array as [`PrimitiveArray`] +/// Value ranges from 1 to 53 (Last week depends on the year). +/// Use [`can_iso_week`] to check if this operation is supported for the target [`DataType`] +pub fn iso_week(array: &dyn Array) -> Result> { + date_like!(u32_iso_week, array, DataType::UInt32) +} + +// Macro to avoid repetition in functions, that apply +// `chrono::Timelike` methods on Arrays +macro_rules! time_like { + ($extract:ident, $array:ident, $data_type:path) => { + match $array.data_type().to_logical_type() { + DataType::Date32 | DataType::Date64 | DataType::Timestamp(_, None) => { + date_variants($array, $data_type, |x| x.$extract()) + } + DataType::Time32(_) | DataType::Time64(_) => { + time_variants($array, DataType::UInt32, |x| x.$extract()) + } + DataType::Timestamp(time_unit, Some(timezone_str)) => { + let array = $array.as_any().downcast_ref().unwrap(); + + if let Ok(timezone) = parse_offset(timezone_str) { + Ok(extract_impl(array, *time_unit, timezone, |x| x.$extract())) + } else { + chrono_tz(array, *time_unit, timezone_str, |x| x.$extract()) + } + } + dt => Err(ArrowError::NotYetImplemented(format!( + "\"{}\" does not support type {:?}", + stringify!($extract), + dt + ))), + } + }; +} + +/// Extracts the hours of a temporal array as [`PrimitiveArray`]. +/// Value ranges from 0 to 23. +/// Use [`can_hour`] to check if this operation is supported for the target [`DataType`]. +pub fn hour(array: &dyn Array) -> Result> { + time_like!(hour, array, DataType::UInt32) +} + +/// Extracts the minutes of a temporal array as [`PrimitiveArray`]. +/// Value ranges from 0 to 59. +/// Use [`can_minute`] to check if this operation is supported for the target [`DataType`]. +pub fn minute(array: &dyn Array) -> Result> { + time_like!(minute, array, DataType::UInt32) +} + +/// Extracts the seconds of a temporal array as [`PrimitiveArray`]. +/// Value ranges from 0 to 59. +/// Use [`can_second`] to check if this operation is supported for the target [`DataType`]. +pub fn second(array: &dyn Array) -> Result> { + time_like!(second, array, DataType::UInt32) +} + +/// Extracts the nanoseconds of a temporal array as [`PrimitiveArray`]. +/// Use [`can_nanosecond`] to check if this operation is supported for the target [`DataType`]. +pub fn nanosecond(array: &dyn Array) -> Result> { + time_like!(nanosecond, array, DataType::UInt32) +} + +fn date_variants(array: &dyn Array, data_type: DataType, op: F) -> Result> +where + O: NativeType, + F: Fn(chrono::NaiveDateTime) -> O, +{ + match array.data_type().to_logical_type() { + DataType::Date32 => { + let array = array + .as_any() + .downcast_ref::>() + .unwrap(); + Ok(unary(array, |x| op(date32_to_datetime(x)), data_type)) + } + DataType::Date64 => { + let array = array + .as_any() + .downcast_ref::>() + .unwrap(); + Ok(unary(array, |x| op(date64_to_datetime(x)), data_type)) + } + DataType::Timestamp(time_unit, None) => { + let array = array + .as_any() + .downcast_ref::>() + .unwrap(); + let func = match time_unit { + TimeUnit::Second => timestamp_s_to_datetime, + TimeUnit::Millisecond => timestamp_ms_to_datetime, + TimeUnit::Microsecond => timestamp_us_to_datetime, + TimeUnit::Nanosecond => timestamp_ns_to_datetime, + }; + Ok(unary(array, |x| op(func(x)), data_type)) + } + _ => unreachable!(), + } +} + +fn time_variants(array: &dyn Array, data_type: DataType, op: F) -> Result> +where + O: NativeType, + F: Fn(chrono::NaiveTime) -> O, +{ + match array.data_type().to_logical_type() { + DataType::Time32(TimeUnit::Second) => { + let array = array + .as_any() + .downcast_ref::>() + .unwrap(); + Ok(unary(array, |x| op(time32s_to_time(x)), data_type)) + } + DataType::Time32(TimeUnit::Millisecond) => { + let array = array + .as_any() + .downcast_ref::>() + .unwrap(); + Ok(unary(array, |x| op(time32ms_to_time(x)), data_type)) + } + DataType::Time64(TimeUnit::Microsecond) => { + let array = array + .as_any() + .downcast_ref::>() + .unwrap(); + Ok(unary(array, |x| op(time64us_to_time(x)), data_type)) + } + DataType::Time64(TimeUnit::Nanosecond) => { + let array = array + .as_any() + .downcast_ref::>() + .unwrap(); + Ok(unary(array, |x| op(time64ns_to_time(x)), data_type)) + } + _ => unreachable!(), + } +} + +#[cfg(feature = "chrono-tz")] +fn chrono_tz( + array: &PrimitiveArray, + time_unit: TimeUnit, + timezone_str: &str, + op: F, +) -> Result> +where + O: NativeType, + F: Fn(chrono::DateTime) -> O, +{ + let timezone = parse_offset_tz(timezone_str)?; + Ok(extract_impl(array, time_unit, timezone, op)) +} + +#[cfg(not(feature = "chrono-tz"))] +fn chrono_tz( + _: &PrimitiveArray, + _: TimeUnit, + timezone_str: &str, + _: F, +) -> Result> +where + O: NativeType, + F: Fn(chrono::DateTime) -> O, +{ + Err(ArrowError::InvalidArgumentError(format!( + "timezone \"{}\" cannot be parsed (feature chrono-tz is not active)", + timezone_str + ))) +} + fn extract_impl( array: &PrimitiveArray, time_unit: TimeUnit, @@ -46,7 +291,7 @@ where let offset = timezone.offset_from_utc_datetime(&datetime); extract(chrono::DateTime::::from_utc(datetime, offset)) }; - unary(array, op, DataType::UInt32) + unary(array, op, A::DATA_TYPE) } TimeUnit::Millisecond => { let op = |x| { @@ -75,149 +320,45 @@ where } } -#[cfg(feature = "chrono-tz")] -#[cfg_attr(docsrs, doc(cfg(feature = "chrono-tz")))] -fn chrono_tz_hour( - array: &PrimitiveArray, - time_unit: TimeUnit, - timezone_str: &str, -) -> Result> { - let timezone = parse_offset_tz(timezone_str)?; - Ok(extract_impl(array, time_unit, timezone, |x| x.hour())) +/// Checks if an array of type `datatype` can perform year operation +/// +/// # Examples +/// ``` +/// use arrow2::compute::temporal::can_year; +/// use arrow2::datatypes::{DataType}; +/// +/// assert_eq!(can_year(&DataType::Date32), true); +/// assert_eq!(can_year(&DataType::Int8), false); +/// ``` +pub fn can_year(data_type: &DataType) -> bool { + can_date(data_type) } -#[cfg(not(feature = "chrono-tz"))] -fn chrono_tz_hour( - _: &PrimitiveArray, - _: TimeUnit, - timezone_str: &str, -) -> Result> { - Err(ArrowError::InvalidArgumentError(format!( - "timezone \"{}\" cannot be parsed (feature chrono-tz is not active)", - timezone_str - ))) +/// Checks if an array of type `datatype` can perform month operation +pub fn can_month(data_type: &DataType) -> bool { + can_date(data_type) } -#[cfg(feature = "chrono-tz")] -#[cfg_attr(docsrs, doc(cfg(feature = "chrono-tz")))] -fn chrono_tz_year( - array: &PrimitiveArray, - time_unit: TimeUnit, - timezone_str: &str, -) -> Result> { - let timezone = parse_offset_tz(timezone_str)?; - Ok(extract_impl(array, time_unit, timezone, |x| x.year())) +/// Checks if an array of type `datatype` can perform day operation +pub fn can_day(data_type: &DataType) -> bool { + can_date(data_type) } -#[cfg(not(feature = "chrono-tz"))] -fn chrono_tz_year( - _: &PrimitiveArray, - _: TimeUnit, - timezone_str: &str, -) -> Result> { - Err(ArrowError::InvalidArgumentError(format!( - "timezone \"{}\" cannot be parsed (feature chrono-tz is not active)", - timezone_str - ))) +/// Checks if an array of type `data_type` can perform weekday operation +pub fn can_weekday(data_type: &DataType) -> bool { + can_date(data_type) } -/// Extracts the hours of a temporal array as [`PrimitiveArray`]. -/// Use [`can_hour`] to check if this operation is supported for the target [`DataType`]. -pub fn hour(array: &dyn Array) -> Result> { - let final_data_type = DataType::UInt32; - match array.data_type() { - DataType::Time32(TimeUnit::Second) => { - let array = array - .as_any() - .downcast_ref::>() - .unwrap(); - Ok(unary(array, |x| time32s_to_time(x).hour(), final_data_type)) - } - DataType::Time32(TimeUnit::Millisecond) => { - let array = array - .as_any() - .downcast_ref::>() - .unwrap(); - Ok(unary( - array, - |x| time32ms_to_time(x).hour(), - final_data_type, - )) - } - DataType::Time64(TimeUnit::Microsecond) => { - let array = array - .as_any() - .downcast_ref::>() - .unwrap(); - Ok(unary( - array, - |x| time64us_to_time(x).hour(), - final_data_type, - )) - } - DataType::Time64(TimeUnit::Nanosecond) => { - let array = array - .as_any() - .downcast_ref::>() - .unwrap(); - Ok(unary( - array, - |x| time64ns_to_time(x).hour(), - final_data_type, - )) - } - DataType::Date32 => { - let array = array - .as_any() - .downcast_ref::>() - .unwrap(); - Ok(unary( - array, - |x| date32_to_datetime(x).hour(), - final_data_type, - )) - } - DataType::Date64 => { - let array = array - .as_any() - .downcast_ref::>() - .unwrap(); - Ok(unary( - array, - |x| date64_to_datetime(x).hour(), - final_data_type, - )) - } - DataType::Timestamp(time_unit, None) => { - let array = array - .as_any() - .downcast_ref::>() - .unwrap(); - let op = match time_unit { - TimeUnit::Second => |x| timestamp_s_to_datetime(x).hour(), - TimeUnit::Millisecond => |x| timestamp_ms_to_datetime(x).hour(), - TimeUnit::Microsecond => |x| timestamp_us_to_datetime(x).hour(), - TimeUnit::Nanosecond => |x| timestamp_ns_to_datetime(x).hour(), - }; - Ok(unary(array, op, final_data_type)) - } - DataType::Timestamp(time_unit, Some(timezone_str)) => { - let time_unit = *time_unit; - let timezone = parse_offset(timezone_str); - - let array = array.as_any().downcast_ref().unwrap(); +/// Checks if an array of type `data_type` can perform ISO week operation +pub fn can_iso_week(data_type: &DataType) -> bool { + can_date(data_type) +} - if let Ok(timezone) = timezone { - Ok(extract_impl(array, time_unit, timezone, |x| x.hour())) - } else { - chrono_tz_hour(array, time_unit, timezone_str) - } - } - dt => Err(ArrowError::NotYetImplemented(format!( - "\"hour\" does not support type {:?}", - dt - ))), - } +fn can_date(data_type: &DataType) -> bool { + matches!( + data_type, + DataType::Date32 | DataType::Date64 | DataType::Timestamp(_, _) + ) } /// Checks if an array of type `datatype` can perform hour operation @@ -227,13 +368,29 @@ pub fn hour(array: &dyn Array) -> Result> { /// use arrow2::compute::temporal::can_hour; /// use arrow2::datatypes::{DataType, TimeUnit}; /// -/// let data_type = DataType::Time32(TimeUnit::Second); -/// assert_eq!(can_hour(&data_type), true); - -/// let data_type = DataType::Int8; -/// assert_eq!(can_hour(&data_type), false); +/// assert_eq!(can_hour(&DataType::Time32(TimeUnit::Second)), true); +/// assert_eq!(can_hour(&DataType::Int8), false); /// ``` pub fn can_hour(data_type: &DataType) -> bool { + can_time(data_type) +} + +/// Checks if an array of type `datatype` can perform minute operation +pub fn can_minute(data_type: &DataType) -> bool { + can_time(data_type) +} + +/// Checks if an array of type `datatype` can perform second operation +pub fn can_second(data_type: &DataType) -> bool { + can_time(data_type) +} + +/// Checks if an array of type `datatype` can perform nanosecond operation +pub fn can_nanosecond(data_type: &DataType) -> bool { + can_time(data_type) +} + +fn can_time(data_type: &DataType) -> bool { matches!( data_type, DataType::Time32(TimeUnit::Second) @@ -245,82 +402,3 @@ pub fn can_hour(data_type: &DataType) -> bool { | DataType::Timestamp(_, _) ) } - -/// Extracts the years of a temporal array as [`PrimitiveArray`]. -/// Use [`can_year`] to check if this operation is supported for the target [`DataType`]. -pub fn year(array: &dyn Array) -> Result> { - let final_data_type = DataType::Int32; - match array.data_type() { - DataType::Date32 => { - let array = array - .as_any() - .downcast_ref::>() - .unwrap(); - Ok(unary( - array, - |x| date32_to_datetime(x).year(), - final_data_type, - )) - } - DataType::Date64 => { - let array = array - .as_any() - .downcast_ref::>() - .unwrap(); - Ok(unary( - array, - |x| date64_to_datetime(x).year(), - final_data_type, - )) - } - DataType::Timestamp(time_unit, None) => { - let array = array - .as_any() - .downcast_ref::>() - .unwrap(); - let op = match time_unit { - TimeUnit::Second => |x| timestamp_s_to_datetime(x).year(), - TimeUnit::Millisecond => |x| timestamp_ms_to_datetime(x).year(), - TimeUnit::Microsecond => |x| timestamp_us_to_datetime(x).year(), - TimeUnit::Nanosecond => |x| timestamp_ns_to_datetime(x).year(), - }; - Ok(unary(array, op, final_data_type)) - } - DataType::Timestamp(time_unit, Some(timezone_str)) => { - let time_unit = *time_unit; - let timezone = parse_offset(timezone_str); - - let array = array.as_any().downcast_ref().unwrap(); - - if let Ok(timezone) = timezone { - Ok(extract_impl(array, time_unit, timezone, |x| x.year())) - } else { - chrono_tz_year(array, time_unit, timezone_str) - } - } - dt => Err(ArrowError::NotYetImplemented(format!( - "\"year\" does not support type {:?}", - dt - ))), - } -} - -/// Checks if an array of type `datatype` can perform year operation -/// -/// # Examples -/// ``` -/// use arrow2::compute::temporal::can_year; -/// use arrow2::datatypes::{DataType}; -/// -/// let data_type = DataType::Date32; -/// assert_eq!(can_year(&data_type), true); - -/// let data_type = DataType::Int8; -/// assert_eq!(can_year(&data_type), false); -/// ``` -pub fn can_year(data_type: &DataType) -> bool { - matches!( - data_type, - DataType::Date32 | DataType::Date64 | DataType::Timestamp(_, _) - ) -} diff --git a/tests/it/compute/temporal.rs b/tests/it/compute/temporal.rs index 160d880a91a..748a4dbfe60 100644 --- a/tests/it/compute/temporal.rs +++ b/tests/it/compute/temporal.rs @@ -2,173 +2,332 @@ use arrow2::array::*; use arrow2::compute::temporal::*; use arrow2::datatypes::*; -#[test] -fn date64_hour() { - let array = - Int64Array::from(&[Some(1514764800000), None, Some(1550636625000)]).to(DataType::Date64); +macro_rules! temporal_test { + ($func:ident, $extract:ident, $data_types:path) => { + #[test] + fn $func() { + for data_type in $data_types() { + let data = TestData::data(&data_type); + let result = $extract(&*data.input).unwrap(); - let result = hour(&array).unwrap(); - let expected = UInt32Array::from(&[Some(0), None, Some(4)]); - assert_eq!(result, expected); + assert_eq!( + result, + data.$extract.unwrap(), + "\"{}\" failed on type: {:?}", + stringify!($extract), + data_type + ); + } + } + }; } -#[test] -fn date32_hour() { - let array = Int32Array::from(&[Some(15147), None, Some(15148)]).to(DataType::Date32); +temporal_test!(temporal_hour, hour, TestData::available_time_like_types); +temporal_test!(temporal_minute, minute, TestData::available_time_like_types); +temporal_test!(temporal_second, second, TestData::available_time_like_types); +temporal_test!( + temporal_nanosecond, + nanosecond, + TestData::available_time_like_types +); +temporal_test!(temporal_year, year, TestData::available_date_like_types); +temporal_test!(temporal_month, month, TestData::available_date_like_types); +temporal_test!(temporal_day, day, TestData::available_date_like_types); +temporal_test!( + temporal_weekday, + weekday, + TestData::available_date_like_types +); +temporal_test!( + temporal_iso_week, + iso_week, + TestData::available_date_like_types +); - let result = hour(&array).unwrap(); - let expected = UInt32Array::from(&[Some(0), None, Some(0)]); - assert_eq!(result, expected); +struct TestData { + input: Box, + year: Option, + month: Option, + day: Option, + weekday: Option, + iso_week: Option, + hour: Option, + minute: Option, + second: Option, + nanosecond: Option, } -#[test] -fn time32_second_hour() { - let array = Int32Array::from(&[Some(37800), None]).to(DataType::Time32(TimeUnit::Second)); +impl TestData { + fn data(data_type: &DataType) -> TestData { + match data_type { + DataType::Date64 => TestData { + input: Box::new( + Int64Array::from(&[Some(1514764800000), None, Some(1550636625000)]) + .to(data_type.clone()), + ), + year: Some(Int32Array::from(&[Some(2018), None, Some(2019)])), + month: Some(UInt32Array::from(&[Some(1), None, Some(2)])), + day: Some(UInt32Array::from(&[Some(1), None, Some(20)])), + weekday: Some(UInt32Array::from(&[Some(1), None, Some(3)])), + iso_week: Some(UInt32Array::from(&[Some(1), None, Some(8)])), + hour: Some(UInt32Array::from(&[Some(0), None, Some(4)])), + minute: Some(UInt32Array::from(&[Some(0), None, Some(23)])), + second: Some(UInt32Array::from(&[Some(0), None, Some(45)])), + nanosecond: Some(UInt32Array::from(&[Some(0), None, Some(0)])), + }, + DataType::Date32 => TestData { + input: Box::new(Int32Array::from(&[Some(15147), None]).to(data_type.clone())), + year: Some(Int32Array::from(&[Some(2011), None])), + month: Some(UInt32Array::from(&[Some(6), None])), + day: Some(UInt32Array::from(&[Some(22), None])), + weekday: Some(UInt32Array::from(&[Some(3), None])), + iso_week: Some(UInt32Array::from(&[Some(25), None])), + hour: Some(UInt32Array::from(&[Some(0), None])), + minute: Some(UInt32Array::from(&[Some(0), None])), + second: Some(UInt32Array::from(&[Some(0), None])), + nanosecond: Some(UInt32Array::from(&[Some(0), None])), + }, + DataType::Time32(TimeUnit::Second) => TestData { + input: Box::new(Int32Array::from(&[Some(37800), None]).to(data_type.clone())), + year: None, + month: None, + day: None, + weekday: None, + iso_week: None, + hour: Some(UInt32Array::from(&[Some(10), None])), + minute: Some(UInt32Array::from(&[Some(30), None])), + second: Some(UInt32Array::from(&[Some(0), None])), + nanosecond: Some(UInt32Array::from(&[Some(0), None])), + }, + DataType::Time64(TimeUnit::Microsecond) => TestData { + input: Box::new(Int64Array::from(&[Some(378000000), None]).to(data_type.clone())), + year: None, + month: None, + day: None, + weekday: None, + iso_week: None, + hour: Some(UInt32Array::from(&[Some(0), None])), + minute: Some(UInt32Array::from(&[Some(6), None])), + second: Some(UInt32Array::from(&[Some(18), None])), + nanosecond: Some(UInt32Array::from(&[Some(0), None])), + }, + DataType::Time64(TimeUnit::Nanosecond) => TestData { + input: Box::new( + Int64Array::from(&[Some(378000000100), None]).to(data_type.clone()), + ), + year: None, + month: None, + day: None, + weekday: None, + iso_week: None, + hour: Some(UInt32Array::from(&[Some(0), None])), + minute: Some(UInt32Array::from(&[Some(6), None])), + second: Some(UInt32Array::from(&[Some(18), None])), + nanosecond: Some(UInt32Array::from(&[Some(100), None])), + }, + DataType::Timestamp(TimeUnit::Microsecond, None) => TestData { + // 68216970000000 (Epoch Microsecond) has 29th Feb (leap year) + input: Box::new( + Int64Array::from(&[Some(1612025847000000), None, Some(68216970000000)]) + .to(data_type.clone()), + ), + year: Some(Int32Array::from(&[Some(2021), None, Some(1972)])), + month: Some(UInt32Array::from(&[Some(1), None, Some(2)])), + day: Some(UInt32Array::from(&[Some(30), None, Some(29)])), + weekday: Some(UInt32Array::from(&[Some(6), None, Some(2)])), + iso_week: Some(UInt32Array::from(&[Some(4), None, Some(9)])), + hour: Some(UInt32Array::from(&[Some(16), None, Some(13)])), + minute: Some(UInt32Array::from(&[Some(57), None, Some(9)])), + second: Some(UInt32Array::from(&[Some(27), None, Some(30)])), + nanosecond: Some(UInt32Array::from(&[Some(0), None, Some(0)])), + }, + _ => unreachable!(), + } + } + + fn available_time_like_types() -> Vec { + vec![ + DataType::Date32, + DataType::Date64, + DataType::Time32(TimeUnit::Second), + DataType::Time64(TimeUnit::Microsecond), + DataType::Time64(TimeUnit::Nanosecond), + DataType::Timestamp(TimeUnit::Microsecond, None), + ] + } - let result = hour(&array).unwrap(); - let expected = UInt32Array::from(&[Some(10), None]); - assert_eq!(result, expected); + fn available_date_like_types() -> Vec { + vec![ + DataType::Date32, + DataType::Date64, + DataType::Timestamp(TimeUnit::Microsecond, None), + ] + } } -#[test] -fn time64_micro_hour() { - let array = - Int64Array::from(&[Some(37800000000), None]).to(DataType::Time64(TimeUnit::Microsecond)); +macro_rules! temporal_tz_test { + ($func:ident, $extract:ident) => { + #[cfg(feature = "chrono-tz")] + #[test] + fn $func() { + let test_data = test_data_tz(); + + for data in test_data { + let result = $extract(&*data.input).unwrap(); - let result = hour(&array).unwrap(); - let expected = UInt32Array::from(&[Some(10), None]); - assert_eq!(result, expected); + assert_eq!(result, data.$extract.unwrap()); + } + } + }; } -#[test] -fn naive_timestamp_micro_hour() { - let array = Int64Array::from(&[Some(37800000000), None]) - .to(DataType::Timestamp(TimeUnit::Microsecond, None)); +temporal_tz_test!(temporal_tz_hour, hour); +temporal_tz_test!(temporal_tz_minute, minute); +temporal_tz_test!(temporal_tz_second, second); +temporal_tz_test!(temporal_tz_nanosecond, nanosecond); +temporal_tz_test!(temporal_tz_year, year); +temporal_tz_test!(temporal_tz_month, month); +temporal_tz_test!(temporal_tz_day, day); +temporal_tz_test!(temporal_tz_weekday, weekday); +temporal_tz_test!(temporal_tz_iso_week, iso_week); - let result = hour(&array).unwrap(); - let expected = UInt32Array::from(&[Some(10), None]); - assert_eq!(result, expected); +fn test_data_tz() -> Vec { + vec![ + TestData { + input: Box::new( + // Mon May 24 2021 17:25:30 GMT+0000 + Int64Array::from(&[Some(1621877130000000), None]).to(DataType::Timestamp( + TimeUnit::Microsecond, + Some("GMT".to_string()), + )), + ), + year: Some(Int32Array::from(&[Some(2021), None])), + month: Some(UInt32Array::from(&[Some(5), None])), + day: Some(UInt32Array::from(&[Some(24), None])), + weekday: Some(UInt32Array::from(&[Some(1), None])), + iso_week: Some(UInt32Array::from(&[Some(21), None])), + hour: Some(UInt32Array::from(&[Some(17), None])), + minute: Some(UInt32Array::from(&[Some(25), None])), + second: Some(UInt32Array::from(&[Some(30), None])), + nanosecond: Some(UInt32Array::from(&[Some(0), None])), + }, + TestData { + input: Box::new(Int64Array::from(&[Some(1621877130000000), None]).to( + DataType::Timestamp(TimeUnit::Microsecond, Some("+01:00".to_string())), + )), + year: Some(Int32Array::from(&[Some(2021), None])), + month: Some(UInt32Array::from(&[Some(5), None])), + day: Some(UInt32Array::from(&[Some(24), None])), + weekday: Some(UInt32Array::from(&[Some(1), None])), + iso_week: Some(UInt32Array::from(&[Some(21), None])), + hour: Some(UInt32Array::from(&[Some(18), None])), + minute: Some(UInt32Array::from(&[Some(25), None])), + second: Some(UInt32Array::from(&[Some(30), None])), + nanosecond: Some(UInt32Array::from(&[Some(0), None])), + }, + TestData { + input: Box::new(Int64Array::from(&[Some(1621877130000000), None]).to( + DataType::Timestamp(TimeUnit::Microsecond, Some("Europe/Lisbon".to_string())), + )), + year: Some(Int32Array::from(&[Some(2021), None])), + month: Some(UInt32Array::from(&[Some(5), None])), + day: Some(UInt32Array::from(&[Some(24), None])), + weekday: Some(UInt32Array::from(&[Some(1), None])), + iso_week: Some(UInt32Array::from(&[Some(21), None])), + hour: Some(UInt32Array::from(&[Some(18), None])), + minute: Some(UInt32Array::from(&[Some(25), None])), + second: Some(UInt32Array::from(&[Some(30), None])), + nanosecond: Some(UInt32Array::from(&[Some(0), None])), + }, + TestData { + input: Box::new( + // Sun Mar 29 2020 00:00:00 GMT+0000 (Western European Standard Time) + Int64Array::from(&[Some(1585440000), None]).to(DataType::Timestamp( + TimeUnit::Second, + Some("Europe/Lisbon".to_string()), + )), + ), + year: Some(Int32Array::from(&[Some(2020), None])), + month: Some(UInt32Array::from(&[Some(3), None])), + day: Some(UInt32Array::from(&[Some(29), None])), + weekday: Some(UInt32Array::from(&[Some(7), None])), + iso_week: Some(UInt32Array::from(&[Some(13), None])), + hour: Some(UInt32Array::from(&[Some(0), None])), + minute: Some(UInt32Array::from(&[Some(0), None])), + second: Some(UInt32Array::from(&[Some(0), None])), + nanosecond: Some(UInt32Array::from(&[Some(0), None])), + }, + TestData { + input: Box::new( + // Sun Mar 29 2020 02:00:00 GMT+0100 (Western European Summer Time) + Int64Array::from(&[Some(1585443600), None]).to(DataType::Timestamp( + TimeUnit::Second, + Some("Europe/Lisbon".to_string()), + )), + ), + year: Some(Int32Array::from(&[Some(2020), None])), + month: Some(UInt32Array::from(&[Some(3), None])), + day: Some(UInt32Array::from(&[Some(29), None])), + weekday: Some(UInt32Array::from(&[Some(7), None])), + iso_week: Some(UInt32Array::from(&[Some(13), None])), + hour: Some(UInt32Array::from(&[Some(2), None])), + minute: Some(UInt32Array::from(&[Some(0), None])), + second: Some(UInt32Array::from(&[Some(0), None])), + nanosecond: Some(UInt32Array::from(&[Some(0), None])), + }, + ] } #[test] -fn date64_year() { - let array = Int64Array::from(&[Some(1514764800000), None]).to(DataType::Date64); - - let result = year(&array).unwrap(); - let expected = Int32Array::from(&[Some(2018), None]); - assert_eq!(result, expected); +fn consistency_hour() { + consistency_check(can_hour, hour); } #[test] -fn naive_timestamp_date32_year() { - let array = Int32Array::from(&[Some(15147), None]).to(DataType::Date32); - - let result = year(&array).unwrap(); - let expected = Int32Array::from(&[Some(2011), None]); - assert_eq!(result, expected); +fn consistency_minute() { + consistency_check(can_minute, minute); } #[test] -fn naive_timestamp_micro_year() { - let array = Int64Array::from(&[Some(1612025847000000), None]) - .to(DataType::Timestamp(TimeUnit::Microsecond, None)); - - let result = year(&array).unwrap(); - let expected = Int32Array::from(&[Some(2021), None]); - assert_eq!(result, expected); +fn consistency_second() { + consistency_check(can_second, second); } #[test] -fn timestamp_micro_hour() { - let array = Int64Array::from(&[Some(1621877130000000), None]).to(DataType::Timestamp( - TimeUnit::Microsecond, - Some("+01:00".to_string()), - )); - - let result = hour(&array).unwrap(); - let expected = UInt32Array::from(&[Some(18), None]); - assert_eq!(result, expected); +fn consistency_nanosecond() { + consistency_check(can_nanosecond, nanosecond); } -#[cfg(feature = "chrono-tz")] #[test] -fn timestamp_micro_hour_tz() { - let timestamp = 1621877130000000; // Mon May 24 2021 17:25:30 GMT+0000 - let array = Int64Array::from(&[Some(timestamp), None]).to(DataType::Timestamp( - TimeUnit::Microsecond, - Some("GMT".to_string()), - )); - - let result = hour(&array).unwrap(); - let expected = UInt32Array::from(&[Some(17), None]); - assert_eq!(result, expected); - - // (Western European Summer Time in Lisbon) => +1 hour - let array = Int64Array::from(&[Some(timestamp), None]).to(DataType::Timestamp( - TimeUnit::Microsecond, - Some("Europe/Lisbon".to_string()), - )); - - let result = hour(&array).unwrap(); - let expected = UInt32Array::from(&[Some(18), None]); - assert_eq!(result, expected); +fn consistency_year() { + consistency_check(can_year, year); } #[test] -fn consistency_hour() { - use arrow2::array::new_null_array; - use arrow2::datatypes::DataType::*; - use arrow2::datatypes::TimeUnit; +fn consistency_month() { + consistency_check(can_month, month); +} - let datatypes = vec![ - Null, - Boolean, - UInt8, - UInt16, - UInt32, - UInt64, - Int8, - Int16, - Int32, - Int64, - Float32, - Float64, - Timestamp(TimeUnit::Second, None), - Timestamp(TimeUnit::Millisecond, None), - Timestamp(TimeUnit::Microsecond, None), - Timestamp(TimeUnit::Nanosecond, None), - Timestamp(TimeUnit::Nanosecond, Some("+00:00".to_string())), - Time64(TimeUnit::Microsecond), - Time64(TimeUnit::Nanosecond), - Date32, - Time32(TimeUnit::Second), - Time32(TimeUnit::Millisecond), - Date64, - Utf8, - LargeUtf8, - Binary, - LargeBinary, - Duration(TimeUnit::Second), - Duration(TimeUnit::Millisecond), - Duration(TimeUnit::Microsecond), - Duration(TimeUnit::Nanosecond), - ]; +#[test] +fn consistency_day() { + consistency_check(can_day, day); +} - datatypes.into_iter().for_each(|d1| { - let array = new_null_array(d1.clone(), 10); - if can_hour(&d1) { - assert!(hour(array.as_ref()).is_ok()); - } else { - assert!(hour(array.as_ref()).is_err()); - } - }); +#[test] +fn consistency_weekday() { + consistency_check(can_weekday, weekday); } #[test] -fn consistency_year() { - use arrow2::array::new_null_array; +fn consistency_iso_week() { + consistency_check(can_iso_week, iso_week); +} + +fn consistency_check( + can_extract: fn(&DataType) -> bool, + extract: fn(&dyn Array) -> arrow2::error::Result>, +) { use arrow2::datatypes::DataType::*; - use arrow2::datatypes::TimeUnit; let datatypes = vec![ Null, @@ -206,10 +365,10 @@ fn consistency_year() { datatypes.into_iter().for_each(|d1| { let array = new_null_array(d1.clone(), 10); - if can_year(&d1) { - assert!(year(array.as_ref()).is_ok()); + if can_extract(&d1) { + assert!(extract(array.as_ref()).is_ok()); } else { - assert!(year(array.as_ref()).is_err()); + assert!(extract(array.as_ref()).is_err()); } }); }