From 947ca52910ede7443efe04aaf21550f06ade7a0e Mon Sep 17 00:00:00 2001 From: Haresh Khanna Date: Mon, 10 Nov 2025 17:13:00 +0000 Subject: [PATCH 1/4] Add timezone to date_trunc fast path --- datafusion/functions/src/datetime/date_trunc.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/datafusion/functions/src/datetime/date_trunc.rs b/datafusion/functions/src/datetime/date_trunc.rs index 913e6217af82..45e15f1456c4 100644 --- a/datafusion/functions/src/datetime/date_trunc.rs +++ b/datafusion/functions/src/datetime/date_trunc.rs @@ -276,6 +276,7 @@ impl ScalarUDFImpl for DateTruncFunc { T::UNIT, array, granularity, + tz_opt.clone(), )?; return Ok(ColumnarValue::Array(result)); } @@ -522,6 +523,7 @@ fn general_date_trunc_array_fine_granularity( tu: TimeUnit, array: &PrimitiveArray, granularity: DateTruncGranularity, + tz_opt: Option>, ) -> Result { let unit = match (tu, granularity) { (Second, DateTruncGranularity::Minute) => NonZeroI64::new(60), @@ -556,7 +558,8 @@ fn general_date_trunc_array_fine_granularity( .iter() .map(|v| *v - i64::rem_euclid(*v, unit)), array.nulls().cloned(), - ); + ) + .with_timezone_opt(tz_opt); Ok(Arc::new(array)) } else { // truncate to the same or smaller unit From 01733d615b60d2c436f86f8d4ff4e74fdeeaea76 Mon Sep 17 00:00:00 2001 From: Haresh Khanna Date: Mon, 10 Nov 2025 17:37:35 +0000 Subject: [PATCH 2/4] Adds tests --- .../functions/src/datetime/date_trunc.rs | 175 ++++++++++++++++++ 1 file changed, 175 insertions(+) diff --git a/datafusion/functions/src/datetime/date_trunc.rs b/datafusion/functions/src/datetime/date_trunc.rs index 45e15f1456c4..2ca83d5a6fdf 100644 --- a/datafusion/functions/src/datetime/date_trunc.rs +++ b/datafusion/functions/src/datetime/date_trunc.rs @@ -1097,4 +1097,179 @@ mod tests { } }); } + + #[test] + fn test_date_trunc_fine_granularity_timezones() { + let cases = [ + // Test "second" granularity + ( + vec![ + "2020-09-08T13:42:29.190855Z", + "2020-09-08T13:42:30.500000Z", + "2020-09-08T13:42:31.999999Z", + ], + Some("+00".into()), + "second", + vec![ + "2020-09-08T13:42:29.000000Z", + "2020-09-08T13:42:30.000000Z", + "2020-09-08T13:42:31.000000Z", + ], + ), + ( + vec![ + "2020-09-08T13:42:29.190855+05", + "2020-09-08T13:42:30.500000+05", + "2020-09-08T13:42:31.999999+05", + ], + Some("+05".into()), + "second", + vec![ + "2020-09-08T13:42:29.000000+05", + "2020-09-08T13:42:30.000000+05", + "2020-09-08T13:42:31.000000+05", + ], + ), + ( + vec![ + "2020-09-08T13:42:29.190855Z", + "2020-09-08T13:42:30.500000Z", + "2020-09-08T13:42:31.999999Z", + ], + Some("Europe/Berlin".into()), + "second", + vec![ + "2020-09-08T13:42:29.000000Z", + "2020-09-08T13:42:30.000000Z", + "2020-09-08T13:42:31.000000Z", + ], + ), + // Test "minute" granularity + ( + vec![ + "2020-09-08T13:42:29.190855Z", + "2020-09-08T13:43:30.500000Z", + "2020-09-08T13:44:31.999999Z", + ], + Some("+00".into()), + "minute", + vec![ + "2020-09-08T13:42:00.000000Z", + "2020-09-08T13:43:00.000000Z", + "2020-09-08T13:44:00.000000Z", + ], + ), + ( + vec![ + "2020-09-08T13:42:29.190855+08", + "2020-09-08T13:43:30.500000+08", + "2020-09-08T13:44:31.999999+08", + ], + Some("+08".into()), + "minute", + vec![ + "2020-09-08T13:42:00.000000+08", + "2020-09-08T13:43:00.000000+08", + "2020-09-08T13:44:00.000000+08", + ], + ), + ( + vec![ + "2020-09-08T13:42:29.190855Z", + "2020-09-08T13:43:30.500000Z", + "2020-09-08T13:44:31.999999Z", + ], + Some("America/Sao_Paulo".into()), + "minute", + vec![ + "2020-09-08T13:42:00.000000Z", + "2020-09-08T13:43:00.000000Z", + "2020-09-08T13:44:00.000000Z", + ], + ), + // Test with None (no timezone) + ( + vec![ + "2020-09-08T13:42:29.190855Z", + "2020-09-08T13:43:30.500000Z", + "2020-09-08T13:44:31.999999Z", + ], + None, + "minute", + vec![ + "2020-09-08T13:42:00.000000Z", + "2020-09-08T13:43:00.000000Z", + "2020-09-08T13:44:00.000000Z", + ], + ), + // Test millisecond granularity + ( + vec![ + "2020-09-08T13:42:29.190855Z", + "2020-09-08T13:42:29.191999Z", + "2020-09-08T13:42:29.192500Z", + ], + Some("Asia/Kolkata".into()), + "millisecond", + vec![ + "2020-09-08T19:12:29.190000+05:30", + "2020-09-08T19:12:29.191000+05:30", + "2020-09-08T19:12:29.192000+05:30", + ], + ), + ]; + + cases + .iter() + .for_each(|(original, tz_opt, granularity, expected)| { + let input = original + .iter() + .map(|s| Some(string_to_timestamp_nanos(s).unwrap())) + .collect::() + .with_timezone_opt(tz_opt.clone()); + let right = expected + .iter() + .map(|s| Some(string_to_timestamp_nanos(s).unwrap())) + .collect::() + .with_timezone_opt(tz_opt.clone()); + let batch_len = input.len(); + let arg_fields = vec![ + Field::new("a", DataType::Utf8, false).into(), + Field::new("b", input.data_type().clone(), false).into(), + ]; + let args = datafusion_expr::ScalarFunctionArgs { + args: vec![ + ColumnarValue::Scalar(ScalarValue::from(*granularity)), + ColumnarValue::Array(Arc::new(input)), + ], + arg_fields, + number_rows: batch_len, + return_field: Field::new( + "f", + DataType::Timestamp(TimeUnit::Nanosecond, tz_opt.clone()), + true, + ) + .into(), + config_options: Arc::new(ConfigOptions::default()), + }; + let result = DateTruncFunc::new().invoke_with_args(args).unwrap(); + if let ColumnarValue::Array(result) = result { + assert_eq!( + result.data_type(), + &DataType::Timestamp(TimeUnit::Nanosecond, tz_opt.clone()), + "Failed for granularity: {}, timezone: {:?}", + granularity, + tz_opt + ); + let left = as_primitive_array::(&result); + assert_eq!( + left, &right, + "Failed for granularity: {}, timezone: {:?}", + granularity, tz_opt + ); + } else { + panic!("unexpected column type"); + } + }); + } } From d1f3bd8f2f1cb44dd1eeb872873892fbfced464b Mon Sep 17 00:00:00 2001 From: Haresh Khanna Date: Mon, 10 Nov 2025 18:10:27 +0000 Subject: [PATCH 3/4] Clippy fixes --- datafusion/functions/src/datetime/date_trunc.rs | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/datafusion/functions/src/datetime/date_trunc.rs b/datafusion/functions/src/datetime/date_trunc.rs index 2ca83d5a6fdf..c8376cf84415 100644 --- a/datafusion/functions/src/datetime/date_trunc.rs +++ b/datafusion/functions/src/datetime/date_trunc.rs @@ -1257,15 +1257,12 @@ mod tests { assert_eq!( result.data_type(), &DataType::Timestamp(TimeUnit::Nanosecond, tz_opt.clone()), - "Failed for granularity: {}, timezone: {:?}", - granularity, - tz_opt + "Failed for granularity: {granularity}, timezone: {tz_opt:?}" ); let left = as_primitive_array::(&result); assert_eq!( left, &right, - "Failed for granularity: {}, timezone: {:?}", - granularity, tz_opt + "Failed for granularity: {granularity}, timezone: {tz_opt:?}" ); } else { panic!("unexpected column type"); From 88a03b4b98b87338c0d690188fc32971ab33b39e Mon Sep 17 00:00:00 2001 From: Haresh Khanna Date: Tue, 11 Nov 2025 11:25:39 +0000 Subject: [PATCH 4/4] Adds a SLT test --- .../sqllogictest/test_files/timestamps.slt | 55 ++++++++++++------- 1 file changed, 34 insertions(+), 21 deletions(-) diff --git a/datafusion/sqllogictest/test_files/timestamps.slt b/datafusion/sqllogictest/test_files/timestamps.slt index cdacad0fda0d..5c365b056d35 100644 --- a/datafusion/sqllogictest/test_files/timestamps.slt +++ b/datafusion/sqllogictest/test_files/timestamps.slt @@ -45,6 +45,9 @@ create table ts_data_millis as select arrow_cast(ts / 1000000, 'Timestamp(Millis statement ok create table ts_data_secs as select arrow_cast(ts / 1000000000, 'Timestamp(Second, None)') as ts, value from ts_data; +statement ok +create table ts_data_micros_kolkata as select arrow_cast(ts / 1000, 'Timestamp(Microsecond, Some("Asia/Kolkata"))') as ts, value from ts_data; + ########## ## Current date Tests @@ -1873,27 +1876,6 @@ true false true true -########## -## Common timestamp data -########## - -statement ok -drop table ts_data - -statement ok -drop table ts_data_nanos - -statement ok -drop table ts_data_micros - -statement ok -drop table ts_data_millis - -statement ok -drop table ts_data_secs - - - ########## ## Timezone impact on scalar functions # @@ -3703,3 +3685,34 @@ SELECT FROM (SELECT CAST('2005-09-10 13:31:00 +02:00' AS timestamp with time zone) AS a) ---- Timestamp(ns, "+00") 2005-09-10T11:31:00Z 2005-09-10T11:31:00Z 2005-09-10T11:31:00Z 2005-09-10T11:31:00Z + +query P +SELECT + date_trunc('millisecond', ts) +FROM ts_data_micros_kolkata +---- +2020-09-08T19:12:29.190+05:30 +2020-09-08T18:12:29.190+05:30 +2020-09-08T17:12:29.190+05:30 + +########## +## Common timestamp data +########## + +statement ok +drop table ts_data + +statement ok +drop table ts_data_nanos + +statement ok +drop table ts_data_micros + +statement ok +drop table ts_data_millis + +statement ok +drop table ts_data_secs + +statement ok +drop table ts_data_micros_kolkata