From 400e4c5f41a18c61e6c8b350a6169c6dc366f751 Mon Sep 17 00:00:00 2001 From: sriram Date: Mon, 10 Nov 2025 16:13:03 +0530 Subject: [PATCH 1/9] resolve conflicts with main --- .../functions/src/datetime/date_part.rs | 146 +++++++++++++--- datafusion/functions/src/datetime/mod.rs | 56 +++++++ .../sqllogictest/test_files/extract_tz.slt | 158 ++++++++++++++++++ .../source/user-guide/sql/scalar_functions.md | 6 + 4 files changed, 345 insertions(+), 21 deletions(-) create mode 100644 datafusion/sqllogictest/test_files/extract_tz.slt diff --git a/datafusion/functions/src/datetime/date_part.rs b/datafusion/functions/src/datetime/date_part.rs index 375200d07280b..f724b9becc116 100644 --- a/datafusion/functions/src/datetime/date_part.rs +++ b/datafusion/functions/src/datetime/date_part.rs @@ -19,16 +19,24 @@ use std::any::Any; use std::str::FromStr; use std::sync::Arc; -use arrow::array::{Array, ArrayRef, Float64Array, Int32Array}; +use arrow::array::timezone::Tz; +use arrow::array::{Array, ArrayRef, Float64Array, Int32Array, PrimitiveBuilder}; use arrow::compute::kernels::cast_utils::IntervalUnit; use arrow::compute::{DatePart, binary, date_part}; use arrow::datatypes::DataType::{ Date32, Date64, Duration, Interval, Time32, Time64, Timestamp, }; use arrow::datatypes::TimeUnit::{Microsecond, Millisecond, Nanosecond, Second}; -use arrow::datatypes::{DataType, Field, FieldRef, TimeUnit}; + +use arrow::datatypes::{ + ArrowTimestampType, DataType, Field, FieldRef, TimeUnit, TimestampMicrosecondType, + TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType, +}; + +use datafusion_common::cast::as_primitive_array; use datafusion_common::types::{NativeType, logical_date}; +use super::adjust_to_local_time; use datafusion_common::{ Result, ScalarValue, cast::{ @@ -56,22 +64,23 @@ use datafusion_macros::user_doc; argument( name = "part", description = r#"Part of the date to return. The following date parts are supported: - - - year - - quarter (emits value in inclusive range [1, 4] based on which quartile of the year the date is in) - - month - - week (week of the year) - - day (day of the month) - - hour - - minute - - second - - millisecond - - microsecond - - nanosecond - - dow (day of the week where Sunday is 0) - - doy (day of the year) - - epoch (seconds since Unix epoch) - - isodow (day of the week where Monday is 0) + + + - year + - quarter (emits value in inclusive range [1, 4] based on which quartile of the year the date is in) + - month + - week (week of the year) + - day (day of the month) + - hour + - minute + - second + - millisecond + - microsecond + - nanosecond + - dow (day of the week where Sunday is 0) + - doy (day of the year) + - epoch (seconds since Unix epoch) + - isodow (day of the week where Monday is 0) "# ), argument( @@ -124,7 +133,7 @@ impl DatePartFunc { ], Volatility::Immutable, ), - aliases: vec![String::from("datepart")], + aliases: vec![String::from("datepart"), String::from("extract")], } } } @@ -173,6 +182,7 @@ impl ScalarUDFImpl for DatePartFunc { &self, args: datafusion_expr::ScalarFunctionArgs, ) -> Result { + let config = &args.config_options; let args = args.args; let [part, array] = take_function_args(self.name(), args)?; @@ -182,8 +192,8 @@ impl ScalarUDFImpl for DatePartFunc { v } else { return exec_err!( - "First argument of `DATE_PART` must be non-null scalar Utf8" - ); +"First argument of `DATE_PART` must be non-null scalar Utf8" +); }; let is_scalar = matches!(array, ColumnarValue::Scalar(_)); @@ -193,7 +203,72 @@ impl ScalarUDFImpl for DatePartFunc { ColumnarValue::Scalar(scalar) => scalar.to_array()?, }; + let (is_timezone_aware, tz_str_opt) = match array.data_type() { + Timestamp(_, Some(tz_str)) => (true, Some(Arc::clone(tz_str))), + _ => (false, None), + }; + let part_trim = part_normalization(&part); + let is_epoch = is_epoch(part_trim); + + // Epoch is timezone-independent - it always returns seconds since 1970-01-01 UTC + let array = if is_epoch { + array + } else if is_timezone_aware { + // For timezone-aware timestamps, extract in their own timezone + match tz_str_opt.as_ref() { + Some(tz_str) => { + let tz = interpret_session_timezone(tz_str)?; + match array.data_type() { + Timestamp(time_unit, _) => match time_unit { + Nanosecond => adjust_timestamp_array::< + TimestampNanosecondType, + >(&array, tz)?, + Microsecond => adjust_timestamp_array::< + TimestampMicrosecondType, + >(&array, tz)?, + Millisecond => adjust_timestamp_array::< + TimestampMillisecondType, + >(&array, tz)?, + Second => { + adjust_timestamp_array::(&array, tz)? + } + }, + _ => array, + } + } + None => array, + } + } else if let Timestamp(time_unit, None) = array.data_type() { + // For naive timestamps, interpret in session timezone if available + match config.execution.time_zone.as_ref() { + Some(tz_str) => { + let tz = interpret_session_timezone(tz_str)?; + + match time_unit { + Nanosecond => { + adjust_timestamp_array::(&array, tz)? + } + Microsecond => { + adjust_timestamp_array::( + &array, tz, + )? + } + Millisecond => { + adjust_timestamp_array::( + &array, tz, + )? + } + Second => { + adjust_timestamp_array::(&array, tz)? + } + } + } + None => array, + } + } else { + array + }; // using IntervalUnit here means we hand off all the work of supporting plurals (like "seconds") // and synonyms ( like "ms,msec,msecond,millisecond") to Arrow @@ -240,12 +315,32 @@ impl ScalarUDFImpl for DatePartFunc { } } +fn adjust_timestamp_array( + array: &ArrayRef, + tz: Tz, +) -> Result { + let mut builder = PrimitiveBuilder::::new(); + let primitive_array = as_primitive_array::(array)?; + for ts_opt in primitive_array.iter() { + match ts_opt { + None => builder.append_null(), + Some(ts) => { + let adjusted_ts = adjust_to_local_time::(ts, tz)?; + builder.append_value(adjusted_ts); + } + } + } + Ok(Arc::new(builder.finish())) +} + fn is_epoch(part: &str) -> bool { let part = part_normalization(part); matches!(part.to_lowercase().as_str(), "epoch") } // Try to remove quote if exist, if the quote is invalid, return original string and let the downstream function handle the error +// Try to remove quote if exist, if the quote is invalid, return original string +// and let the downstream function handle the error. fn part_normalization(part: &str) -> &str { part.strip_prefix(|c| c == '\'' || c == '\"') .and_then(|s| s.strip_suffix(|c| c == '\'' || c == '\"')) @@ -255,6 +350,13 @@ fn part_normalization(part: &str) -> &str { /// Invoke [`date_part`] on an `array` (e.g. Timestamp) and convert the /// result to a total number of seconds, milliseconds, microseconds or /// nanoseconds +fn interpret_session_timezone(tz_str: &str) -> Result { + match tz_str.parse::() { + Ok(tz) => Ok(tz), + Err(err) => exec_err!("Invalid timezone '{tz_str}': {err}"), + } +} + fn seconds_as_i32(array: &dyn Array, unit: TimeUnit) -> Result { // Nanosecond is neither supported in Postgres nor DuckDB, to avoid dealing // with overflow and precision issue we don't support nanosecond @@ -310,6 +412,8 @@ fn seconds_as_i32(array: &dyn Array, unit: TimeUnit) -> Result { /// nanoseconds /// /// Given epoch return f64, this is a duplicated function to optimize for f64 type +// Converts seconds to f64 with the specified time unit. +// Used for Interval and Duration types that need floating-point precision. fn seconds(array: &dyn Array, unit: TimeUnit) -> Result { let sf = match unit { Second => 1_f64, diff --git a/datafusion/functions/src/datetime/mod.rs b/datafusion/functions/src/datetime/mod.rs index 39b9453295df6..acbac50ca3da8 100644 --- a/datafusion/functions/src/datetime/mod.rs +++ b/datafusion/functions/src/datetime/mod.rs @@ -19,6 +19,13 @@ use std::sync::Arc; +use arrow::array::timezone::Tz; +use arrow::datatypes::ArrowTimestampType; +use arrow::datatypes::TimeUnit::{Microsecond, Millisecond, Nanosecond, Second}; +use chrono::{DateTime, MappedLocalTime, Offset, TimeDelta, TimeZone, Utc}; +use datafusion_common::{exec_err, internal_datafusion_err, Result}; +use std::ops::Add; + use datafusion_expr::ScalarUDF; pub mod common; @@ -39,6 +46,55 @@ pub mod to_time; pub mod to_timestamp; pub mod to_unixtime; +// Adjusts a timestamp to local time by applying the timezone offset. +pub fn adjust_to_local_time(ts: i64, tz: Tz) -> Result { + fn convert_timestamp(ts: i64, converter: F) -> Result> + where + F: Fn(i64) -> MappedLocalTime>, + { + match converter(ts) { + MappedLocalTime::Ambiguous(earliest, latest) => exec_err!( + "Ambiguous timestamp. Do you mean {:?} or {:?}", + earliest, + latest + ), + MappedLocalTime::None => exec_err!( + "The local time does not exist because there is a gap in the local time." + ), + MappedLocalTime::Single(date_time) => Ok(date_time), + } + } + + let date_time = match T::UNIT { + Nanosecond => Utc.timestamp_nanos(ts), + Microsecond => convert_timestamp(ts, |ts| Utc.timestamp_micros(ts))?, + Millisecond => convert_timestamp(ts, |ts| Utc.timestamp_millis_opt(ts))?, + Second => convert_timestamp(ts, |ts| Utc.timestamp_opt(ts, 0))?, + }; + + let offset_seconds: i64 = tz + .offset_from_utc_datetime(&date_time.naive_utc()) + .fix() + .local_minus_utc() as i64; + + let adjusted_date_time = date_time.add( + TimeDelta::try_seconds(offset_seconds) + .ok_or_else(|| internal_datafusion_err!("Offset seconds should be less than i64::MAX / 1_000 or greater than -i64::MAX / 1_000"))?, + ); + + // convert back to i64 + match T::UNIT { + Nanosecond => adjusted_date_time.timestamp_nanos_opt().ok_or_else(|| { + internal_datafusion_err!( + "Failed to convert DateTime to timestamp in nanosecond. This error may occur if the date is out of range. The supported date ranges are between 1677-09-21T00:12:43.145224192 and 2262-04-11T23:47:16.854775807" + ) + }), + Microsecond => Ok(adjusted_date_time.timestamp_micros()), + Millisecond => Ok(adjusted_date_time.timestamp_millis()), + Second => Ok(adjusted_date_time.timestamp()), + } +} + // create UDFs make_udf_function!(current_date::CurrentDateFunc, current_date); make_udf_function!(current_time::CurrentTimeFunc, current_time); diff --git a/datafusion/sqllogictest/test_files/extract_tz.slt b/datafusion/sqllogictest/test_files/extract_tz.slt new file mode 100644 index 0000000000000..41892fef70772 --- /dev/null +++ b/datafusion/sqllogictest/test_files/extract_tz.slt @@ -0,0 +1,158 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Tests for timezone-aware extract SQL statement support. +# Test with different timezone +statement ok +SET datafusion.execution.time_zone = '-03:00'; + +query I +SELECT EXTRACT(HOUR FROM TIMESTAMP '2025-11-18 10:00:00'); +---- +7 + +query II +SELECT EXTRACT(MINUTE FROM TIMESTAMP '2023-10-30 10:45:30'), + EXTRACT(SECOND FROM TIMESTAMP '2023-10-30 10:45:30'); +---- +45 30 + +query III +SELECT EXTRACT(YEAR FROM DATE '2023-10-30'), + EXTRACT(MONTH FROM DATE '2023-10-30'), + EXTRACT(DAY FROM DATE '2023-10-30'); +---- +2023 10 30 + +query I +SELECT EXTRACT(HOUR FROM CAST(NULL AS TIMESTAMP)); +---- +NULL + +statement ok +SET datafusion.execution.time_zone = '+04:00'; + +query I +SELECT EXTRACT(HOUR FROM TIMESTAMP '2023-10-30 02:00:00'); +---- +6 + +query III +SELECT EXTRACT(HOUR FROM TIMESTAMP '2023-10-30 18:20:59'), + EXTRACT(MINUTE FROM TIMESTAMP '2023-10-30 18:20:59'), + EXTRACT(SECOND FROM TIMESTAMP '2023-10-30 18:20:59'); +---- +22 20 59 + +query II +SELECT EXTRACT(DOW FROM DATE '2025-11-01'), + EXTRACT(DOY FROM DATE '2026-12-31'); +---- +6 365 + +statement ok +SET datafusion.execution.time_zone = '+00:00'; + +query I +SELECT EXTRACT(HOUR FROM TIMESTAMP '2025-10-30 10:45:30+02:00'); +---- +8 + +query I +SELECT EXTRACT(HOUR FROM TIMESTAMP '2025-10-30 10:45:30-05:00'); +---- +15 + +query II +SELECT EXTRACT(YEAR FROM TIMESTAMP '2026-11-30 10:45:30Z'), + EXTRACT(MONTH FROM TIMESTAMP '2023-10-30 10:45:30Z'); +---- +2026 10 + +query III +SELECT EXTRACT(HOUR FROM TIMESTAMP '2023-10-30 18:20:59+04:00'), + EXTRACT(MINUTE FROM TIMESTAMP '2023-10-30 18:20:59+04:00'), + EXTRACT(SECOND FROM TIMESTAMP '2023-10-30 18:20:59+04:00'); +---- +14 20 59 + +query II +SELECT EXTRACT(HOUR FROM TIMESTAMP '2025-10-30 10:25:30+02:30'), + EXTRACT(MINUTE FROM TIMESTAMP '2023-10-30 18:20:59-04:30'); +---- +7 50 + +query III +SELECT EXTRACT(HOUR FROM TIMESTAMP '2023-10-30 18:20:59-08:00'), + EXTRACT(DAY FROM TIMESTAMP '2023-10-30 18:20:59-07:00'), + EXTRACT(DAY FROM TIMESTAMP '2023-10-30 07:20:59+12:00'); +---- +2 31 29 + +query IIIIII +SELECT EXTRACT(YEAR FROM TIMESTAMP '2023-12-31 18:20:59-08:45'), + EXTRACT(MONTH FROM TIMESTAMP '2023-12-31 18:20:59-08:45'), + EXTRACT(DAY FROM TIMESTAMP '2023-12-31 18:20:59-08:45'), + EXTRACT(HOUR FROM TIMESTAMP '2023-12-31 18:20:59-08:45'), + EXTRACT(MINUTE FROM TIMESTAMP '2023-12-31 18:20:59-08:45'), + EXTRACT(SECOND FROM TIMESTAMP '2023-12-31 18:20:59-08:45'); +---- +2024 1 1 3 5 59 + +query IIIIII +SELECT EXTRACT(YEAR FROM TIMESTAMP '2024-01-01 03:05:59+08:45'), + EXTRACT(MONTH FROM TIMESTAMP '2024-01-01 03:05:59+08:45'), + EXTRACT(DAY FROM TIMESTAMP '2024-01-01 03:05:59+08:45'), + EXTRACT(HOUR FROM TIMESTAMP '2024-01-01 03:05:59+08:45'), + EXTRACT(MINUTE FROM TIMESTAMP '2024-01-01 03:05:59+08:45'), + EXTRACT(SECOND FROM TIMESTAMP '2024-01-01 03:05:59+08:45'); +---- +2023 12 31 18 20 59 + +statement ok +SET datafusion.execution.time_zone = 'Asia/Kolkata'; + +query IIII +SELECT +EXTRACT(HOUR FROM TIMESTAMP '2025-11-22 15:30:45'), +EXTRACT(MINUTE FROM TIMESTAMP '2025-11-22 15:30:45'), +EXTRACT(DOW FROM TIMESTAMP '2025-11-22 00:00:00'), +EXTRACT(SECOND FROM TIMESTAMP '2024-01-01 03:05:59'); +---- +21 0 6 59 + +query I +SELECT EXTRACT(HOUR FROM TIMESTAMP '2025-01-15 10:00:00'); +---- +15 + +statement ok +SET datafusion.execution.time_zone = 'America/New_York'; + +query IIII +SELECT +EXTRACT(HOUR FROM TIMESTAMP '2025-11-22 15:30:45'), +EXTRACT(MINUTE FROM TIMESTAMP '2025-11-22 15:30:45'), +EXTRACT(DOW FROM TIMESTAMP '2025-11-22 00:00:00'), +EXTRACT(SECOND FROM TIMESTAMP '2024-01-01 03:05:59'); +---- +10 30 5 59 + +query I +SELECT EXTRACT(HOUR FROM TIMESTAMP '2025-01-15 10:00:00'); +---- +5 \ No newline at end of file diff --git a/docs/source/user-guide/sql/scalar_functions.md b/docs/source/user-guide/sql/scalar_functions.md index 4079802d9e630..744ef26c6dd7b 100644 --- a/docs/source/user-guide/sql/scalar_functions.md +++ b/docs/source/user-guide/sql/scalar_functions.md @@ -2387,6 +2387,7 @@ Additional examples can be found [here](https://github.com/apache/datafusion/blo - [date_trunc](#date_trunc) - [datepart](#datepart) - [datetrunc](#datetrunc) +- [extract](#extract) - [from_unixtime](#from_unixtime) - [make_date](#make_date) - [make_time](#make_time) @@ -2545,6 +2546,7 @@ extract(field FROM source) #### Aliases - datepart +- extract ### `date_trunc` @@ -2593,6 +2595,10 @@ _Alias of [date_part](#date_part)._ _Alias of [date_trunc](#date_trunc)._ +### `extract` + +_Alias of [date_part](#date_part)._ + ### `from_unixtime` Converts an integer to RFC3339 timestamp format (`YYYY-MM-DDT00:00:00.000000000Z`). Integers and unsigned integers are interpreted as seconds since the unix epoch (`1970-01-01T00:00:00Z`) return the corresponding timestamp. From a3d9e53ab8e10f7dfb5ec9bbe4092229042bbb8f Mon Sep 17 00:00:00 2001 From: sriram Date: Sat, 29 Nov 2025 16:55:45 +0530 Subject: [PATCH 2/9] make extract timezone aware --- .../functions/src/datetime/date_part.rs | 3 +- datafusion/functions/src/datetime/mod.rs | 3 +- .../functions/src/datetime/to_local_time.rs | 68 ++----------------- .../sqllogictest/test_files/extract_tz.slt | 2 +- 4 files changed, 12 insertions(+), 64 deletions(-) diff --git a/datafusion/functions/src/datetime/date_part.rs b/datafusion/functions/src/datetime/date_part.rs index f724b9becc116..9ca9bd266819b 100644 --- a/datafusion/functions/src/datetime/date_part.rs +++ b/datafusion/functions/src/datetime/date_part.rs @@ -20,7 +20,8 @@ use std::str::FromStr; use std::sync::Arc; use arrow::array::timezone::Tz; -use arrow::array::{Array, ArrayRef, Float64Array, Int32Array, PrimitiveBuilder}; +use arrow::array::{Array, ArrayRef, Float64Array, Int32Array, + PrimitiveBuilder}; use arrow::compute::kernels::cast_utils::IntervalUnit; use arrow::compute::{DatePart, binary, date_part}; use arrow::datatypes::DataType::{ diff --git a/datafusion/functions/src/datetime/mod.rs b/datafusion/functions/src/datetime/mod.rs index acbac50ca3da8..dfc0b815db55c 100644 --- a/datafusion/functions/src/datetime/mod.rs +++ b/datafusion/functions/src/datetime/mod.rs @@ -21,7 +21,8 @@ use std::sync::Arc; use arrow::array::timezone::Tz; use arrow::datatypes::ArrowTimestampType; -use arrow::datatypes::TimeUnit::{Microsecond, Millisecond, Nanosecond, Second}; +use arrow::datatypes::TimeUnit::{Microsecond, Millisecond, Nanosecond, + Second}; use chrono::{DateTime, MappedLocalTime, Offset, TimeDelta, TimeZone, Utc}; use datafusion_common::{exec_err, internal_datafusion_err, Result}; use std::ops::Add; diff --git a/datafusion/functions/src/datetime/to_local_time.rs b/datafusion/functions/src/datetime/to_local_time.rs index 86c949711d011..185b63cdb975f 100644 --- a/datafusion/functions/src/datetime/to_local_time.rs +++ b/datafusion/functions/src/datetime/to_local_time.rs @@ -16,7 +16,6 @@ // under the License. use std::any::Any; -use std::ops::Add; use std::sync::Arc; use arrow::array::timezone::Tz; @@ -27,13 +26,10 @@ use arrow::datatypes::{ ArrowTimestampType, DataType, TimestampMicrosecondType, TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType, }; -use chrono::{DateTime, MappedLocalTime, Offset, TimeDelta, TimeZone, Utc}; +use crate::datetime::adjust_to_local_time; use datafusion_common::cast::as_primitive_array; -use datafusion_common::{ - Result, ScalarValue, exec_err, internal_datafusion_err, internal_err, - utils::take_function_args, -}; +use datafusion_common::{internal_err, utils::take_function_args, Result, ScalarValue}; use datafusion_expr::{ Coercion, ColumnarValue, Documentation, ScalarUDFImpl, Signature, TypeSignatureClass, Volatility, @@ -192,7 +188,7 @@ fn to_local_time(time_value: &ColumnarValue) -> Result { dt => { return internal_err!( "to_local_time function requires timestamp argument, got {dt}" - ); + ) } }; @@ -324,70 +320,20 @@ fn to_local_time(time_value: &ColumnarValue) -> Result { /// ``` /// /// See `test_adjust_to_local_time()` for example -fn adjust_to_local_time(ts: i64, tz: Tz) -> Result { - fn convert_timestamp(ts: i64, converter: F) -> Result> - where - F: Fn(i64) -> MappedLocalTime>, - { - match converter(ts) { - MappedLocalTime::Ambiguous(earliest, latest) => exec_err!( - "Ambiguous timestamp. Do you mean {:?} or {:?}", - earliest, - latest - ), - MappedLocalTime::None => exec_err!( - "The local time does not exist because there is a gap in the local time." - ), - MappedLocalTime::Single(date_time) => Ok(date_time), - } - } - - let date_time = match T::UNIT { - Nanosecond => Utc.timestamp_nanos(ts), - Microsecond => convert_timestamp(ts, |ts| Utc.timestamp_micros(ts))?, - Millisecond => convert_timestamp(ts, |ts| Utc.timestamp_millis_opt(ts))?, - Second => convert_timestamp(ts, |ts| Utc.timestamp_opt(ts, 0))?, - }; - - let offset_seconds: i64 = tz - .offset_from_utc_datetime(&date_time.naive_utc()) - .fix() - .local_minus_utc() as i64; - - let adjusted_date_time = date_time.add( - // This should not fail under normal circumstances as the - // maximum possible offset is 26 hours (93,600 seconds) - TimeDelta::try_seconds(offset_seconds) - .ok_or_else(|| internal_datafusion_err!("Offset seconds should be less than i64::MAX / 1_000 or greater than -i64::MAX / 1_000"))?, - ); - - // convert the naive datetime back to i64 - match T::UNIT { - Nanosecond => adjusted_date_time.timestamp_nanos_opt().ok_or_else(|| - internal_datafusion_err!( - "Failed to convert DateTime to timestamp in nanosecond. This error may occur if the date is out of range. The supported date ranges are between 1677-09-21T00:12:43.145224192 and 2262-04-11T23:47:16.854775807" - ) - ), - Microsecond => Ok(adjusted_date_time.timestamp_micros()), - Millisecond => Ok(adjusted_date_time.timestamp_millis()), - Second => Ok(adjusted_date_time.timestamp()), - } -} - #[cfg(test)] mod tests { use std::sync::Arc; - use arrow::array::{Array, TimestampNanosecondArray, types::TimestampNanosecondType}; + use super::ToLocalTimeFunc; + use crate::datetime::adjust_to_local_time; + use arrow::array::{types::TimestampNanosecondType, Array, TimestampNanosecondArray}; use arrow::compute::kernels::cast_utils::string_to_timestamp_nanos; use arrow::datatypes::{DataType, Field, TimeUnit}; use chrono::NaiveDateTime; - use datafusion_common::ScalarValue; use datafusion_common::config::ConfigOptions; + use datafusion_common::ScalarValue; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl}; - use super::{ToLocalTimeFunc, adjust_to_local_time}; - #[test] fn test_adjust_to_local_time() { let timestamp_str = "2020-03-31T13:40:00"; diff --git a/datafusion/sqllogictest/test_files/extract_tz.slt b/datafusion/sqllogictest/test_files/extract_tz.slt index 41892fef70772..010129ecb8090 100644 --- a/datafusion/sqllogictest/test_files/extract_tz.slt +++ b/datafusion/sqllogictest/test_files/extract_tz.slt @@ -155,4 +155,4 @@ EXTRACT(SECOND FROM TIMESTAMP '2024-01-01 03:05:59'); query I SELECT EXTRACT(HOUR FROM TIMESTAMP '2025-01-15 10:00:00'); ---- -5 \ No newline at end of file +5 From b488c1abd9381c3e753aecade3823b5b4b356435 Mon Sep 17 00:00:00 2001 From: sriram Date: Sat, 29 Nov 2025 17:00:16 +0530 Subject: [PATCH 3/9] cargo fmt --- datafusion/functions/src/datetime/date_part.rs | 3 +-- datafusion/functions/src/datetime/mod.rs | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/datafusion/functions/src/datetime/date_part.rs b/datafusion/functions/src/datetime/date_part.rs index 9ca9bd266819b..f724b9becc116 100644 --- a/datafusion/functions/src/datetime/date_part.rs +++ b/datafusion/functions/src/datetime/date_part.rs @@ -20,8 +20,7 @@ use std::str::FromStr; use std::sync::Arc; use arrow::array::timezone::Tz; -use arrow::array::{Array, ArrayRef, Float64Array, Int32Array, - PrimitiveBuilder}; +use arrow::array::{Array, ArrayRef, Float64Array, Int32Array, PrimitiveBuilder}; use arrow::compute::kernels::cast_utils::IntervalUnit; use arrow::compute::{DatePart, binary, date_part}; use arrow::datatypes::DataType::{ diff --git a/datafusion/functions/src/datetime/mod.rs b/datafusion/functions/src/datetime/mod.rs index dfc0b815db55c..acbac50ca3da8 100644 --- a/datafusion/functions/src/datetime/mod.rs +++ b/datafusion/functions/src/datetime/mod.rs @@ -21,8 +21,7 @@ use std::sync::Arc; use arrow::array::timezone::Tz; use arrow::datatypes::ArrowTimestampType; -use arrow::datatypes::TimeUnit::{Microsecond, Millisecond, Nanosecond, - Second}; +use arrow::datatypes::TimeUnit::{Microsecond, Millisecond, Nanosecond, Second}; use chrono::{DateTime, MappedLocalTime, Offset, TimeDelta, TimeZone, Utc}; use datafusion_common::{exec_err, internal_datafusion_err, Result}; use std::ops::Add; From 1ff4726819cad5ae49def8fadee016ffbf226c13 Mon Sep 17 00:00:00 2001 From: sriram Date: Sat, 29 Nov 2025 20:13:19 +0530 Subject: [PATCH 4/9] refactoring and setting volatility to stable --- datafusion/functions/src/datetime/date_part.rs | 5 ----- 1 file changed, 5 deletions(-) diff --git a/datafusion/functions/src/datetime/date_part.rs b/datafusion/functions/src/datetime/date_part.rs index f724b9becc116..8aeefcc5061be 100644 --- a/datafusion/functions/src/datetime/date_part.rs +++ b/datafusion/functions/src/datetime/date_part.rs @@ -407,11 +407,6 @@ fn seconds_as_i32(array: &dyn Array, unit: TimeUnit) -> Result { } } -/// Invoke [`date_part`] on an `array` (e.g. Timestamp) and convert the -/// result to a total number of seconds, milliseconds, microseconds or -/// nanoseconds -/// -/// Given epoch return f64, this is a duplicated function to optimize for f64 type // Converts seconds to f64 with the specified time unit. // Used for Interval and Duration types that need floating-point precision. fn seconds(array: &dyn Array, unit: TimeUnit) -> Result { From 7ae1503cec5aa356c7ac0450ddee8fd8cb9a7615 Mon Sep 17 00:00:00 2001 From: sriram Date: Wed, 3 Dec 2025 22:47:14 +0530 Subject: [PATCH 5/9] minor refactoring and test addition --- .../functions/src/datetime/date_part.rs | 58 ++++++++----------- datafusion/functions/src/datetime/mod.rs | 57 +----------------- .../sqllogictest/test_files/extract_tz.slt | 21 ++++++- 3 files changed, 45 insertions(+), 91 deletions(-) diff --git a/datafusion/functions/src/datetime/date_part.rs b/datafusion/functions/src/datetime/date_part.rs index 8aeefcc5061be..4b6edee652505 100644 --- a/datafusion/functions/src/datetime/date_part.rs +++ b/datafusion/functions/src/datetime/date_part.rs @@ -22,23 +22,21 @@ use std::sync::Arc; use arrow::array::timezone::Tz; use arrow::array::{Array, ArrayRef, Float64Array, Int32Array, PrimitiveBuilder}; use arrow::compute::kernels::cast_utils::IntervalUnit; -use arrow::compute::{DatePart, binary, date_part}; +use arrow::compute::{binary, date_part, DatePart}; use arrow::datatypes::DataType::{ Date32, Date64, Duration, Interval, Time32, Time64, Timestamp, }; use arrow::datatypes::TimeUnit::{Microsecond, Millisecond, Nanosecond, Second}; - use arrow::datatypes::{ ArrowTimestampType, DataType, Field, FieldRef, TimeUnit, TimestampMicrosecondType, TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType, }; use datafusion_common::cast::as_primitive_array; -use datafusion_common::types::{NativeType, logical_date}; +use datafusion_common::types::{logical_date, NativeType}; use super::adjust_to_local_time; use datafusion_common::{ - Result, ScalarValue, cast::{ as_date32_array, as_date64_array, as_int32_array, as_time32_millisecond_array, as_time32_second_array, as_time64_microsecond_array, as_time64_nanosecond_array, @@ -48,6 +46,7 @@ use datafusion_common::{ exec_err, internal_err, not_impl_err, types::logical_string, utils::take_function_args, + Result, ScalarValue, }; use datafusion_expr::{ ColumnarValue, Documentation, ReturnFieldArgs, ScalarUDFImpl, Signature, @@ -65,22 +64,21 @@ use datafusion_macros::user_doc; name = "part", description = r#"Part of the date to return. The following date parts are supported: - - - year - - quarter (emits value in inclusive range [1, 4] based on which quartile of the year the date is in) - - month - - week (week of the year) - - day (day of the month) - - hour - - minute - - second - - millisecond - - microsecond - - nanosecond - - dow (day of the week where Sunday is 0) - - doy (day of the year) - - epoch (seconds since Unix epoch) - - isodow (day of the week where Monday is 0) + - year + - quarter (emits value in inclusive range [1, 4] based on which quartile of the year the date is in) + - month + - week (week of the year) + - day (day of the month) + - hour + - minute + - second + - millisecond + - microsecond + - nanosecond + - dow (day of the week where Sunday is 0) + - doy (day of the year) + - epoch (seconds since Unix epoch) + - isodow (day of the week where Monday is 0) "# ), argument( @@ -131,7 +129,7 @@ impl DatePartFunc { Coercion::new_exact(TypeSignatureClass::Duration), ]), ], - Volatility::Immutable, + Volatility::Stable, ), aliases: vec![String::from("datepart"), String::from("extract")], } @@ -192,8 +190,8 @@ impl ScalarUDFImpl for DatePartFunc { v } else { return exec_err!( -"First argument of `DATE_PART` must be non-null scalar Utf8" -); + "First argument of `DATE_PART` must be non-null scalar Utf8" + ); }; let is_scalar = matches!(array, ColumnarValue::Scalar(_)); @@ -273,7 +271,7 @@ impl ScalarUDFImpl for DatePartFunc { // using IntervalUnit here means we hand off all the work of supporting plurals (like "seconds") // and synonyms ( like "ms,msec,msecond,millisecond") to Arrow let arr = if let Ok(interval_unit) = IntervalUnit::from_str(part_trim) { - match interval_unit { + let extracted = match interval_unit { IntervalUnit::Year => date_part(array.as_ref(), DatePart::Year)?, IntervalUnit::Month => date_part(array.as_ref(), DatePart::Month)?, IntervalUnit::Week => date_part(array.as_ref(), DatePart::Week)?, @@ -284,9 +282,10 @@ impl ScalarUDFImpl for DatePartFunc { IntervalUnit::Millisecond => seconds_as_i32(array.as_ref(), Millisecond)?, IntervalUnit::Microsecond => seconds_as_i32(array.as_ref(), Microsecond)?, IntervalUnit::Nanosecond => seconds_as_i32(array.as_ref(), Nanosecond)?, - // century and decade are not supported by `DatePart`, although they are supported in postgres _ => return exec_err!("Date part '{part}' not supported"), - } + }; + + extracted } else { // special cases that can be extracted (in postgres) but are not interval units match part_trim.to_lowercase().as_str() { @@ -334,11 +333,9 @@ fn adjust_timestamp_array( } fn is_epoch(part: &str) -> bool { - let part = part_normalization(part); matches!(part.to_lowercase().as_str(), "epoch") } -// Try to remove quote if exist, if the quote is invalid, return original string and let the downstream function handle the error // Try to remove quote if exist, if the quote is invalid, return original string // and let the downstream function handle the error. fn part_normalization(part: &str) -> &str { @@ -347,9 +344,6 @@ fn part_normalization(part: &str) -> &str { .unwrap_or(part) } -/// Invoke [`date_part`] on an `array` (e.g. Timestamp) and convert the -/// result to a total number of seconds, milliseconds, microseconds or -/// nanoseconds fn interpret_session_timezone(tz_str: &str) -> Result { match tz_str.parse::() { Ok(tz) => Ok(tz), @@ -379,7 +373,6 @@ fn seconds_as_i32(array: &dyn Array, unit: TimeUnit) -> Result { }; let secs = date_part(array, DatePart::Second)?; - // This assumes array is primitive and not a dictionary let secs = as_int32_array(secs.as_ref())?; let subsecs = date_part(array, DatePart::Nanosecond)?; let subsecs = as_int32_array(subsecs.as_ref())?; @@ -417,7 +410,6 @@ fn seconds(array: &dyn Array, unit: TimeUnit) -> Result { Nanosecond => 1_000_000_000_f64, }; let secs = date_part(array, DatePart::Second)?; - // This assumes array is primitive and not a dictionary let secs = as_int32_array(secs.as_ref())?; let subsecs = date_part(array, DatePart::Nanosecond)?; let subsecs = as_int32_array(subsecs.as_ref())?; diff --git a/datafusion/functions/src/datetime/mod.rs b/datafusion/functions/src/datetime/mod.rs index acbac50ca3da8..4f3e45d761c34 100644 --- a/datafusion/functions/src/datetime/mod.rs +++ b/datafusion/functions/src/datetime/mod.rs @@ -19,16 +19,10 @@ use std::sync::Arc; -use arrow::array::timezone::Tz; -use arrow::datatypes::ArrowTimestampType; -use arrow::datatypes::TimeUnit::{Microsecond, Millisecond, Nanosecond, Second}; -use chrono::{DateTime, MappedLocalTime, Offset, TimeDelta, TimeZone, Utc}; -use datafusion_common::{exec_err, internal_datafusion_err, Result}; -use std::ops::Add; - use datafusion_expr::ScalarUDF; pub mod common; +pub use common::adjust_to_local_time; pub mod current_date; pub mod current_time; pub mod date_bin; @@ -46,55 +40,6 @@ pub mod to_time; pub mod to_timestamp; pub mod to_unixtime; -// Adjusts a timestamp to local time by applying the timezone offset. -pub fn adjust_to_local_time(ts: i64, tz: Tz) -> Result { - fn convert_timestamp(ts: i64, converter: F) -> Result> - where - F: Fn(i64) -> MappedLocalTime>, - { - match converter(ts) { - MappedLocalTime::Ambiguous(earliest, latest) => exec_err!( - "Ambiguous timestamp. Do you mean {:?} or {:?}", - earliest, - latest - ), - MappedLocalTime::None => exec_err!( - "The local time does not exist because there is a gap in the local time." - ), - MappedLocalTime::Single(date_time) => Ok(date_time), - } - } - - let date_time = match T::UNIT { - Nanosecond => Utc.timestamp_nanos(ts), - Microsecond => convert_timestamp(ts, |ts| Utc.timestamp_micros(ts))?, - Millisecond => convert_timestamp(ts, |ts| Utc.timestamp_millis_opt(ts))?, - Second => convert_timestamp(ts, |ts| Utc.timestamp_opt(ts, 0))?, - }; - - let offset_seconds: i64 = tz - .offset_from_utc_datetime(&date_time.naive_utc()) - .fix() - .local_minus_utc() as i64; - - let adjusted_date_time = date_time.add( - TimeDelta::try_seconds(offset_seconds) - .ok_or_else(|| internal_datafusion_err!("Offset seconds should be less than i64::MAX / 1_000 or greater than -i64::MAX / 1_000"))?, - ); - - // convert back to i64 - match T::UNIT { - Nanosecond => adjusted_date_time.timestamp_nanos_opt().ok_or_else(|| { - internal_datafusion_err!( - "Failed to convert DateTime to timestamp in nanosecond. This error may occur if the date is out of range. The supported date ranges are between 1677-09-21T00:12:43.145224192 and 2262-04-11T23:47:16.854775807" - ) - }), - Microsecond => Ok(adjusted_date_time.timestamp_micros()), - Millisecond => Ok(adjusted_date_time.timestamp_millis()), - Second => Ok(adjusted_date_time.timestamp()), - } -} - // create UDFs make_udf_function!(current_date::CurrentDateFunc, current_date); make_udf_function!(current_time::CurrentTimeFunc, current_time); diff --git a/datafusion/sqllogictest/test_files/extract_tz.slt b/datafusion/sqllogictest/test_files/extract_tz.slt index 010129ecb8090..e0dc37e6965d8 100644 --- a/datafusion/sqllogictest/test_files/extract_tz.slt +++ b/datafusion/sqllogictest/test_files/extract_tz.slt @@ -127,8 +127,7 @@ statement ok SET datafusion.execution.time_zone = 'Asia/Kolkata'; query IIII -SELECT -EXTRACT(HOUR FROM TIMESTAMP '2025-11-22 15:30:45'), +SELECT EXTRACT(HOUR FROM TIMESTAMP '2025-11-22 15:30:45'), EXTRACT(MINUTE FROM TIMESTAMP '2025-11-22 15:30:45'), EXTRACT(DOW FROM TIMESTAMP '2025-11-22 00:00:00'), EXTRACT(SECOND FROM TIMESTAMP '2024-01-01 03:05:59'); @@ -156,3 +155,21 @@ query I SELECT EXTRACT(HOUR FROM TIMESTAMP '2025-01-15 10:00:00'); ---- 5 + +statement ok +SET datafusion.execution.time_zone = '-03:30'; + +query II +SELECT EXTRACT(MINUTE FROM TIMESTAMP '2023-10-30 10:45:30'), +EXTRACT(SECOND FROM TIMESTAMP '2023-10-30 10:45:30'); +---- +15 30 + +statement ok +SET datafusion.execution.time_zone = 'America/St_Johns'; + +query II +SELECT EXTRACT(MINUTE FROM TIMESTAMP '2023-10-30 10:45:30'), +EXTRACT(SECOND FROM TIMESTAMP '2023-10-30 10:45:30'); +---- +15 30 From e34d403d415723c4dc6dd1a27e7d573f5b90c9fe Mon Sep 17 00:00:00 2001 From: sriram Date: Thu, 18 Dec 2025 01:31:11 +0530 Subject: [PATCH 6/9] resolve conflicts --- datafusion/functions/src/datetime/date_part.rs | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/datafusion/functions/src/datetime/date_part.rs b/datafusion/functions/src/datetime/date_part.rs index 4b6edee652505..54accaf60b6d8 100644 --- a/datafusion/functions/src/datetime/date_part.rs +++ b/datafusion/functions/src/datetime/date_part.rs @@ -33,7 +33,7 @@ use arrow::datatypes::{ }; use datafusion_common::cast::as_primitive_array; -use datafusion_common::types::{logical_date, NativeType}; +use datafusion_common::types::{NativeType, logical_date}; use super::adjust_to_local_time; use datafusion_common::{ @@ -271,7 +271,7 @@ impl ScalarUDFImpl for DatePartFunc { // using IntervalUnit here means we hand off all the work of supporting plurals (like "seconds") // and synonyms ( like "ms,msec,msecond,millisecond") to Arrow let arr = if let Ok(interval_unit) = IntervalUnit::from_str(part_trim) { - let extracted = match interval_unit { + match interval_unit { IntervalUnit::Year => date_part(array.as_ref(), DatePart::Year)?, IntervalUnit::Month => date_part(array.as_ref(), DatePart::Month)?, IntervalUnit::Week => date_part(array.as_ref(), DatePart::Week)?, @@ -283,9 +283,7 @@ impl ScalarUDFImpl for DatePartFunc { IntervalUnit::Microsecond => seconds_as_i32(array.as_ref(), Microsecond)?, IntervalUnit::Nanosecond => seconds_as_i32(array.as_ref(), Nanosecond)?, _ => return exec_err!("Date part '{part}' not supported"), - }; - - extracted + } } else { // special cases that can be extracted (in postgres) but are not interval units match part_trim.to_lowercase().as_str() { From b4d206ec13c590e345cc08f3f261183448f57346 Mon Sep 17 00:00:00 2001 From: sriram Date: Tue, 13 Jan 2026 00:24:34 +0530 Subject: [PATCH 7/9] resolve conflicts --- datafusion/functions/src/datetime/common.rs | 31 ++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/datafusion/functions/src/datetime/common.rs b/datafusion/functions/src/datetime/common.rs index 2db64beafa9b7..0fcf2e72d56f8 100644 --- a/datafusion/functions/src/datetime/common.rs +++ b/datafusion/functions/src/datetime/common.rs @@ -35,12 +35,41 @@ use datafusion_common::{ internal_datafusion_err, unwrap_or_internal_err, }; use datafusion_expr::ColumnarValue; +use std::ops::Add; /// Error message if nanosecond conversion request beyond supported interval const ERR_NANOSECONDS_NOT_SUPPORTED: &str = "The dates that can be represented as nanoseconds have to be between 1677-09-21T00:12:44.0 and 2262-04-11T23:47:16.854775804"; static UTC: LazyLock = LazyLock::new(|| "UTC".parse().expect("UTC is always valid")); +/// Converts a string representation of a date‑time into a timestamp expressed in +/// nanoseconds since the Unix epoch. +/// +/// This helper is a thin wrapper around the more general `string_to_datetime` +/// function. It accepts an optional `timezone` which, if `None`, defaults to +/// Coordinated Universal Time (UTC). The string `s` must contain a valid +/// date‑time format that can be parsed by the underlying chrono parser. +/// +/// # Return Value +/// +/// * `Ok(i64)` – The number of nanoseconds since `1970‑01‑01T00:00:00Z`. +/// * `Err(DataFusionError)` – If the string cannot be parsed, the parsed +/// value is out of range (between 1677-09-21T00:12:44.0 and 2262-04-11T23:47:16.854775804) +/// or the parsed value does not correspond to an unambiguous time. +pub(crate) fn string_to_timestamp_nanos_with_timezone( + timezone: &Option, + s: &str, +) -> Result { + let tz = timezone.as_ref().unwrap_or(&UTC); + let dt = string_to_datetime(tz, s)?; + let parsed = dt + .timestamp_nanos_opt() + .ok_or_else(|| exec_datafusion_err!("{ERR_NANOSECONDS_NOT_SUPPORTED}"))?; + + Ok(parsed) +} +static UTC: LazyLock = LazyLock::new(|| "UTC".parse().expect("UTC is always valid")); + /// Converts a string representation of a date‑time into a timestamp expressed in /// nanoseconds since the Unix epoch. /// @@ -453,7 +482,7 @@ where } other => exec_err!( "Unsupported data type {other:?} for function substr,\ - expected Utf8View, Utf8 or LargeUtf8." + expected Utf8View, Utf8 or LargeUtf8." ), }, other => exec_err!( From 8123ac6e9b44eac7501d151c20328b95cec3abab Mon Sep 17 00:00:00 2001 From: sriram Date: Tue, 13 Jan 2026 01:32:23 +0530 Subject: [PATCH 8/9] resolve conflicts --- datafusion/functions/src/datetime/common.rs | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/datafusion/functions/src/datetime/common.rs b/datafusion/functions/src/datetime/common.rs index 0fcf2e72d56f8..d34175ecc9f25 100644 --- a/datafusion/functions/src/datetime/common.rs +++ b/datafusion/functions/src/datetime/common.rs @@ -23,12 +23,14 @@ use arrow::array::{ StringArrayType, StringViewArray, }; use arrow::compute::DecimalCast; -use arrow::compute::kernels::cast_utils::string_to_datetime; -use arrow::datatypes::{DataType, TimeUnit}; +use arrow::compute::kernels::cast_utils::{ + string_to_datetime, string_to_timestamp_nanos, +}; +use arrow::datatypes::{ArrowTimestampType, DataType, TimeUnit}; use arrow_buffer::ArrowNativeType; use chrono::LocalResult::Single; use chrono::format::{Parsed, StrftimeItems, parse}; -use chrono::{DateTime, TimeZone, Utc}; +use chrono::{DateTime, MappedLocalTime, TimeDelta, TimeZone, Utc}; use datafusion_common::cast::as_generic_string_array; use datafusion_common::{ DataFusionError, Result, ScalarValue, exec_datafusion_err, exec_err, @@ -481,8 +483,7 @@ where ) } other => exec_err!( - "Unsupported data type {other:?} for function substr,\ - expected Utf8View, Utf8 or LargeUtf8." + "Unsupported data type {other:?} for function substr, expected Utf8View, Utf8 or LargeUtf8." ), }, other => exec_err!( @@ -516,12 +517,14 @@ where DataType::Utf8View => Ok(a.as_string_view().value(pos)), DataType::LargeUtf8 => Ok(a.as_string::().value(pos)), DataType::Utf8 => Ok(a.as_string::().value(pos)), - other => exec_err!("Unexpected type encountered '{other}'"), + other => exec_err!("Unexpected type encountered '{}'", other), }, ColumnarValue::Scalar(s) => match s.try_as_str() { Some(Some(v)) => Ok(v), Some(None) => continue, // null string - None => exec_err!("Unexpected scalar type encountered '{s}'"), + None => { + exec_err!("Unexpected scalar type encountered '{}'", s) + } }, }?; @@ -569,6 +572,6 @@ fn scalar_value(dt: &DataType, r: Option) -> Result { TimeUnit::Microsecond => Ok(ScalarValue::TimestampMicrosecond(r, tz.clone())), TimeUnit::Nanosecond => Ok(ScalarValue::TimestampNanosecond(r, tz.clone())), }, - t => Err(internal_datafusion_err!("Unsupported data type: {t:?}")), + t => Err(internal_datafusion_err!("Unsupported data type: {:?}", t)), } } From b008a2a4f6267003b8e4d547ba6c45d4aca834c3 Mon Sep 17 00:00:00 2001 From: sriram Date: Wed, 14 Jan 2026 00:07:43 +0530 Subject: [PATCH 9/9] resolve conflicts (ii) --- datafusion/functions/src/datetime/common.rs | 86 +++++++++++++------ .../functions/src/datetime/date_part.rs | 4 +- .../functions/src/datetime/to_local_time.rs | 8 +- 3 files changed, 67 insertions(+), 31 deletions(-) diff --git a/datafusion/functions/src/datetime/common.rs b/datafusion/functions/src/datetime/common.rs index d34175ecc9f25..cb2c67b51bb93 100644 --- a/datafusion/functions/src/datetime/common.rs +++ b/datafusion/functions/src/datetime/common.rs @@ -42,34 +42,70 @@ use std::ops::Add; /// Error message if nanosecond conversion request beyond supported interval const ERR_NANOSECONDS_NOT_SUPPORTED: &str = "The dates that can be represented as nanoseconds have to be between 1677-09-21T00:12:44.0 and 2262-04-11T23:47:16.854775804"; -static UTC: LazyLock = LazyLock::new(|| "UTC".parse().expect("UTC is always valid")); +pub fn adjust_to_local_time(ts: i64, tz: Tz) -> Result { + fn convert_timestamp(ts: i64, converter: F) -> Result> + where + F: Fn(i64) -> MappedLocalTime>, + { + match converter(ts) { + MappedLocalTime::Ambiguous(earliest, latest) => exec_err!( + "Ambiguous timestamp. Do you mean {:?} or {:?}", + earliest, + latest + ), + MappedLocalTime::None => exec_err!( + "The local time does not exist because there is a gap in the local time." + ), + Single(date_time) => Ok(date_time), + } + } -/// Converts a string representation of a date‑time into a timestamp expressed in -/// nanoseconds since the Unix epoch. -/// -/// This helper is a thin wrapper around the more general `string_to_datetime` -/// function. It accepts an optional `timezone` which, if `None`, defaults to -/// Coordinated Universal Time (UTC). The string `s` must contain a valid -/// date‑time format that can be parsed by the underlying chrono parser. -/// -/// # Return Value -/// -/// * `Ok(i64)` – The number of nanoseconds since `1970‑01‑01T00:00:00Z`. -/// * `Err(DataFusionError)` – If the string cannot be parsed, the parsed -/// value is out of range (between 1677-09-21T00:12:44.0 and 2262-04-11T23:47:16.854775804) -/// or the parsed value does not correspond to an unambiguous time. -pub(crate) fn string_to_timestamp_nanos_with_timezone( - timezone: &Option, - s: &str, -) -> Result { - let tz = timezone.as_ref().unwrap_or(&UTC); - let dt = string_to_datetime(tz, s)?; - let parsed = dt - .timestamp_nanos_opt() - .ok_or_else(|| exec_datafusion_err!("{ERR_NANOSECONDS_NOT_SUPPORTED}"))?; + let date_time = match T::UNIT { + TimeUnit::Nanosecond => Utc.timestamp_nanos(ts), + TimeUnit::Microsecond => convert_timestamp(ts, |ts| Utc.timestamp_micros(ts))?, + TimeUnit::Millisecond => convert_timestamp(ts, |ts| Utc.timestamp_millis_opt(ts))?, + TimeUnit::Second => convert_timestamp(ts, |ts| Utc.timestamp_opt(ts, 0))?, + }; - Ok(parsed) + // Get the timezone offset for this datetime + let tz_offset = tz.offset_from_utc_datetime(&date_time.naive_utc()); + // Convert offset to seconds - offset is formatted like "+01:00" or "-05:00" + let offset_str = format!("{tz_offset}"); + let offset_seconds: i64 = if let Some(stripped) = offset_str.strip_prefix('-') { + let parts: Vec<&str> = stripped.split(':').collect(); + let hours: i64 = parts.first().and_then(|s| s.parse().ok()).unwrap_or(0); + let mins: i64 = parts.get(1).and_then(|s| s.parse().ok()).unwrap_or(0); + -((hours * 3600) + (mins * 60)) + } else { + let parts: Vec<&str> = offset_str.split(':').collect(); + let hours: i64 = parts.first().and_then(|s| s.parse().ok()).unwrap_or(0); + let mins: i64 = parts.get(1).and_then(|s| s.parse().ok()).unwrap_or(0); + (hours * 3600) + (mins * 60) + }; + + let adjusted_date_time = date_time.add( + TimeDelta::try_seconds(offset_seconds) + .ok_or_else(|| internal_datafusion_err!("Offset seconds should be less than i64::MAX / 1_000 or greater than -i64::MAX / 1_000"))?, + ); + + // convert back to i64 + match T::UNIT { + TimeUnit::Nanosecond => adjusted_date_time.timestamp_nanos_opt().ok_or_else(|| { + internal_datafusion_err!( + "Failed to convert DateTime to timestamp in nanosecond. This error may occur if the date is out of range. The supported date ranges are between 1677-09-21T00:12:43.145224192 and 2262-04-11T23:47:16.854775807" + ) + }), + TimeUnit::Microsecond => Ok(adjusted_date_time.timestamp_micros()), + TimeUnit::Millisecond => Ok(adjusted_date_time.timestamp_millis()), + TimeUnit::Second => Ok(adjusted_date_time.timestamp()), + } +} + +/// Calls string_to_timestamp_nanos and converts the error type +pub(crate) fn string_to_timestamp_nanos_shim(s: &str) -> Result { + string_to_timestamp_nanos(s).map_err(|e| e.into()) } + static UTC: LazyLock = LazyLock::new(|| "UTC".parse().expect("UTC is always valid")); /// Converts a string representation of a date‑time into a timestamp expressed in diff --git a/datafusion/functions/src/datetime/date_part.rs b/datafusion/functions/src/datetime/date_part.rs index 54accaf60b6d8..6e3c38402df37 100644 --- a/datafusion/functions/src/datetime/date_part.rs +++ b/datafusion/functions/src/datetime/date_part.rs @@ -22,7 +22,7 @@ use std::sync::Arc; use arrow::array::timezone::Tz; use arrow::array::{Array, ArrayRef, Float64Array, Int32Array, PrimitiveBuilder}; use arrow::compute::kernels::cast_utils::IntervalUnit; -use arrow::compute::{binary, date_part, DatePart}; +use arrow::compute::{DatePart, binary, date_part}; use arrow::datatypes::DataType::{ Date32, Date64, Duration, Interval, Time32, Time64, Timestamp, }; @@ -37,6 +37,7 @@ use datafusion_common::types::{NativeType, logical_date}; use super::adjust_to_local_time; use datafusion_common::{ + Result, ScalarValue, cast::{ as_date32_array, as_date64_array, as_int32_array, as_time32_millisecond_array, as_time32_second_array, as_time64_microsecond_array, as_time64_nanosecond_array, @@ -46,7 +47,6 @@ use datafusion_common::{ exec_err, internal_err, not_impl_err, types::logical_string, utils::take_function_args, - Result, ScalarValue, }; use datafusion_expr::{ ColumnarValue, Documentation, ReturnFieldArgs, ScalarUDFImpl, Signature, diff --git a/datafusion/functions/src/datetime/to_local_time.rs b/datafusion/functions/src/datetime/to_local_time.rs index 185b63cdb975f..178714c78bd37 100644 --- a/datafusion/functions/src/datetime/to_local_time.rs +++ b/datafusion/functions/src/datetime/to_local_time.rs @@ -29,7 +29,7 @@ use arrow::datatypes::{ use crate::datetime::adjust_to_local_time; use datafusion_common::cast::as_primitive_array; -use datafusion_common::{internal_err, utils::take_function_args, Result, ScalarValue}; +use datafusion_common::{Result, ScalarValue, internal_err, utils::take_function_args}; use datafusion_expr::{ Coercion, ColumnarValue, Documentation, ScalarUDFImpl, Signature, TypeSignatureClass, Volatility, @@ -188,7 +188,7 @@ fn to_local_time(time_value: &ColumnarValue) -> Result { dt => { return internal_err!( "to_local_time function requires timestamp argument, got {dt}" - ) + ); } }; @@ -326,12 +326,12 @@ mod tests { use super::ToLocalTimeFunc; use crate::datetime::adjust_to_local_time; - use arrow::array::{types::TimestampNanosecondType, Array, TimestampNanosecondArray}; + use arrow::array::{Array, TimestampNanosecondArray, types::TimestampNanosecondType}; use arrow::compute::kernels::cast_utils::string_to_timestamp_nanos; use arrow::datatypes::{DataType, Field, TimeUnit}; use chrono::NaiveDateTime; - use datafusion_common::config::ConfigOptions; use datafusion_common::ScalarValue; + use datafusion_common::config::ConfigOptions; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl}; #[test]