diff --git a/datafusion/functions/src/datetime/date_part.rs b/datafusion/functions/src/datetime/date_part.rs index 8ee82d872651..b6a9a1c7e9db 100644 --- a/datafusion/functions/src/datetime/date_part.rs +++ b/datafusion/functions/src/datetime/date_part.rs @@ -23,8 +23,10 @@ use arrow::array::{Array, ArrayRef, Float64Array}; use arrow::compute::kernels::cast_utils::IntervalUnit; use arrow::compute::{binary, cast, date_part, DatePart}; use arrow::datatypes::DataType::{ - Date32, Date64, Float64, Time32, Time64, Timestamp, Utf8, Utf8View, + Date32, Date64, Duration, Float64, Interval, Time32, Time64, Timestamp, Utf8, + Utf8View, }; +use arrow::datatypes::IntervalUnit::{DayTime, MonthDayNano, YearMonth}; use arrow::datatypes::TimeUnit::{Microsecond, Millisecond, Nanosecond, Second}; use arrow::datatypes::{DataType, TimeUnit}; @@ -109,6 +111,20 @@ impl DatePartFunc { Exact(vec![Utf8View, Time64(Microsecond)]), Exact(vec![Utf8, Time64(Nanosecond)]), Exact(vec![Utf8View, Time64(Nanosecond)]), + Exact(vec![Utf8, Interval(YearMonth)]), + Exact(vec![Utf8View, Interval(YearMonth)]), + Exact(vec![Utf8, Interval(DayTime)]), + Exact(vec![Utf8View, Interval(DayTime)]), + Exact(vec![Utf8, Interval(MonthDayNano)]), + Exact(vec![Utf8View, Interval(MonthDayNano)]), + Exact(vec![Utf8, Duration(Second)]), + Exact(vec![Utf8View, Duration(Second)]), + Exact(vec![Utf8, Duration(Millisecond)]), + Exact(vec![Utf8View, Duration(Millisecond)]), + Exact(vec![Utf8, Duration(Microsecond)]), + Exact(vec![Utf8View, Duration(Microsecond)]), + Exact(vec![Utf8, Duration(Nanosecond)]), + Exact(vec![Utf8View, Duration(Nanosecond)]), ], Volatility::Immutable, ), @@ -224,10 +240,28 @@ fn seconds(array: &dyn Array, unit: TimeUnit) -> Result { let subsecs = date_part(array, DatePart::Nanosecond)?; let subsecs = as_int32_array(subsecs.as_ref())?; - let r: Float64Array = binary(secs, subsecs, |secs, subsecs| { - (secs as f64 + (subsecs as f64 / 1_000_000_000_f64)) * sf - })?; - Ok(Arc::new(r)) + // Special case where there are no nulls. + if subsecs.null_count() == 0 { + let r: Float64Array = binary(secs, subsecs, |secs, subsecs| { + (secs as f64 + ((subsecs % 1_000_000_000) as f64 / 1_000_000_000_f64)) * sf + })?; + Ok(Arc::new(r)) + } else { + // Nulls in secs are preserved, nulls in subsecs are treated as zero to account for the case + // where the number of nanoseconds overflows. + let r: Float64Array = secs + .iter() + .zip(subsecs) + .map(|(secs, subsecs)| { + secs.map(|secs| { + let subsecs = subsecs.unwrap_or(0); + (secs as f64 + ((subsecs % 1_000_000_000) as f64 / 1_000_000_000_f64)) + * sf + }) + }) + .collect(); + Ok(Arc::new(r)) + } } fn epoch(array: &dyn Array) -> Result { @@ -256,7 +290,8 @@ fn epoch(array: &dyn Array) -> Result { Time64(Nanosecond) => { as_time64_nanosecond_array(array)?.unary(|x| x as f64 / 1_000_000_000_f64) } - d => return exec_err!("Can not convert {d:?} to epoch"), + Interval(_) | Duration(_) => return seconds(array, Second), + d => return exec_err!("Cannot convert {d:?} to epoch"), }; Ok(Arc::new(f)) } diff --git a/datafusion/sqllogictest/test_files/expr.slt b/datafusion/sqllogictest/test_files/expr.slt index e8d8329d34e1..182afff7a693 100644 --- a/datafusion/sqllogictest/test_files/expr.slt +++ b/datafusion/sqllogictest/test_files/expr.slt @@ -1472,6 +1472,189 @@ SELECT extract(epoch from arrow_cast('1969-12-31', 'Date64')) ---- -86400 +# test_extract_interval + +query R +SELECT extract(year from arrow_cast('10 years', 'Interval(YearMonth)')) +---- +10 + +query R +SELECT extract(month from arrow_cast('10 years', 'Interval(YearMonth)')) +---- +0 + +query R +SELECT extract(year from arrow_cast('10 months', 'Interval(YearMonth)')) +---- +0 + +query R +SELECT extract(month from arrow_cast('10 months', 'Interval(YearMonth)')) +---- +10 + +query R +SELECT extract(year from arrow_cast('20 months', 'Interval(YearMonth)')) +---- +1 + +query R +SELECT extract(month from arrow_cast('20 months', 'Interval(YearMonth)')) +---- +8 + +query error DataFusion error: Arrow error: Compute error: Year does not support: Interval\(DayTime\) +SELECT extract(year from arrow_cast('10 days', 'Interval(DayTime)')) + +query error DataFusion error: Arrow error: Compute error: Month does not support: Interval\(DayTime\) +SELECT extract(month from arrow_cast('10 days', 'Interval(DayTime)')) + +query R +SELECT extract(day from arrow_cast('10 days', 'Interval(DayTime)')) +---- +10 + +query R +SELECT extract(day from arrow_cast('14400 minutes', 'Interval(DayTime)')) +---- +0 + +query R +SELECT extract(minute from arrow_cast('14400 minutes', 'Interval(DayTime)')) +---- +14400 + +query R +SELECT extract(second from arrow_cast('5.1 seconds', 'Interval(DayTime)')) +---- +5 + +query R +SELECT extract(second from arrow_cast('14400 minutes', 'Interval(DayTime)')) +---- +864000 + +query R +SELECT extract(second from arrow_cast('2 months', 'Interval(MonthDayNano)')) +---- +0 + +query R +SELECT extract(second from arrow_cast('2 days', 'Interval(MonthDayNano)')) +---- +0 + +query R +SELECT extract(second from arrow_cast('2 seconds', 'Interval(MonthDayNano)')) +---- +2 + +query R +SELECT extract(seconds from arrow_cast('2 seconds', 'Interval(MonthDayNano)')) +---- +2 + +query R +SELECT extract(epoch from arrow_cast('2 seconds', 'Interval(MonthDayNano)')) +---- +2 + +query R +SELECT extract(milliseconds from arrow_cast('2 seconds', 'Interval(MonthDayNano)')) +---- +2000 + +query R +SELECT extract(second from arrow_cast('2030 milliseconds', 'Interval(MonthDayNano)')) +---- +2.03 + +query R +SELECT extract(second from arrow_cast(NULL, 'Interval(MonthDayNano)')) +---- +NULL + +statement ok +create table t (id int, i interval) as values + (0, interval '5 months 1 day 10 nanoseconds'), + (1, interval '1 year 3 months'), + (2, interval '3 days 2 milliseconds'), + (3, interval '2 seconds'), + (4, interval '8 months'), + (5, NULL); + +query IRR rowsort +select + id, + extract(second from i), + extract(month from i) +from t +order by id; +---- +0 0.00000001 5 +1 0 15 +2 0.002 0 +3 2 0 +4 0 8 +5 NULL NULL + +statement ok +drop table t; + +# test_extract_duration + +query R +SELECT extract(second from arrow_cast(2, 'Duration(Second)')) +---- +2 + +query R +SELECT extract(seconds from arrow_cast(2, 'Duration(Second)')) +---- +2 + +query R +SELECT extract(epoch from arrow_cast(2, 'Duration(Second)')) +---- +2 + +query R +SELECT extract(millisecond from arrow_cast(2, 'Duration(Second)')) +---- +2000 + +query R +SELECT extract(second from arrow_cast(2, 'Duration(Millisecond)')) +---- +0.002 + +query R +SELECT extract(second from arrow_cast(2002, 'Duration(Millisecond)')) +---- +2.002 + +query R +SELECT extract(millisecond from arrow_cast(2002, 'Duration(Millisecond)')) +---- +2002 + +query R +SELECT extract(day from arrow_cast(864000, 'Duration(Second)')) +---- +10 + +query error DataFusion error: Arrow error: Compute error: Month does not support: Duration\(Second\) +SELECT extract(month from arrow_cast(864000, 'Duration(Second)')) + +query error DataFusion error: Arrow error: Compute error: Year does not support: Duration\(Second\) +SELECT extract(year from arrow_cast(864000, 'Duration(Second)')) + +query R +SELECT extract(day from arrow_cast(NULL, 'Duration(Second)')) +---- +NULL + # test_extract_date_part_func query B