diff --git a/datafusion/physical-expr/Cargo.toml b/datafusion/physical-expr/Cargo.toml index 40827f6bfb14..41a16f21c2ae 100644 --- a/datafusion/physical-expr/Cargo.toml +++ b/datafusion/physical-expr/Cargo.toml @@ -44,7 +44,6 @@ arrow = { version = "17.0.0", features = ["prettyprint"] } blake2 = { version = "^0.10.2", optional = true } blake3 = { version = "1.0", optional = true } chrono = { version = "0.4", default-features = false } -chronoutil = "0.2.3" datafusion-common = { path = "../common", version = "9.0.0" } datafusion-expr = { path = "../expr", version = "9.0.0" } datafusion-row = { path = "../row", version = "9.0.0" } diff --git a/datafusion/physical-expr/src/expressions/datetime.rs b/datafusion/physical-expr/src/expressions/datetime.rs index 65b6db595e64..fbbe77139067 100644 --- a/datafusion/physical-expr/src/expressions/datetime.rs +++ b/datafusion/physical-expr/src/expressions/datetime.rs @@ -15,11 +15,11 @@ // specific language governing permissions and limitations // under the License. +use crate::expressions::delta::shift_months; use crate::PhysicalExpr; use arrow::datatypes::{DataType, Schema}; use arrow::record_batch::RecordBatch; use chrono::{Duration, NaiveDate}; -use chronoutil::shift_months; use datafusion_common::Result; use datafusion_common::{DataFusionError, ScalarValue}; use datafusion_expr::{ColumnarValue, Operator}; diff --git a/datafusion/physical-expr/src/expressions/delta.rs b/datafusion/physical-expr/src/expressions/delta.rs new file mode 100644 index 000000000000..b7efdab0a48d --- /dev/null +++ b/datafusion/physical-expr/src/expressions/delta.rs @@ -0,0 +1,182 @@ +// MIT License +// +// Copyright (c) 2020-2022 Oliver Margetts +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +// Copied from chronoutil crate + +//! Contains utility functions for shifting Date objects. +use chrono::Datelike; + +/// Returns true if the year is a leap-year, as naively defined in the Gregorian calendar. +#[inline] +pub(crate) fn is_leap_year(year: i32) -> bool { + year % 4 == 0 && (year % 100 != 0 || year % 400 == 0) +} + +// If the day lies within the month, this function has no effect. Otherwise, it shifts +// day backwards to the final day of the month. +// XXX: No attempt is made to handle days outside the 1-31 range. +#[inline] +fn normalise_day(year: i32, month: u32, day: u32) -> u32 { + if day <= 28 { + day + } else if month == 2 { + 28 + is_leap_year(year) as u32 + } else if day == 31 && (month == 4 || month == 6 || month == 9 || month == 11) { + 30 + } else { + day + } +} + +/// Shift a date by the given number of months. +/// Ambiguous month-ends are shifted backwards as necessary. +pub(crate) fn shift_months(date: D, months: i32) -> D { + let mut year = date.year() + (date.month() as i32 + months) / 12; + let mut month = (date.month() as i32 + months) % 12; + let mut day = date.day(); + + if month < 1 { + year -= 1; + month += 12; + } + + day = normalise_day(year, month as u32, day); + + // This is slow but guaranteed to succeed (short of interger overflow) + if day <= 28 { + date.with_day(day) + .unwrap() + .with_month(month as u32) + .unwrap() + .with_year(year) + .unwrap() + } else { + date.with_day(1) + .unwrap() + .with_month(month as u32) + .unwrap() + .with_year(year) + .unwrap() + .with_day(day) + .unwrap() + } +} + +#[cfg(test)] +mod tests { + use std::collections::HashSet; + + use chrono::naive::{NaiveDate, NaiveDateTime, NaiveTime}; + + use super::*; + + #[test] + fn test_leap_year_cases() { + let _leap_years: Vec = vec![ + 1904, 1908, 1912, 1916, 1920, 1924, 1928, 1932, 1936, 1940, 1944, 1948, 1952, + 1956, 1960, 1964, 1968, 1972, 1976, 1980, 1984, 1988, 1992, 1996, 2000, 2004, + 2008, 2012, 2016, 2020, + ]; + let leap_years_1900_to_2020: HashSet = _leap_years.into_iter().collect(); + + for year in 1900..2021 { + assert_eq!(is_leap_year(year), leap_years_1900_to_2020.contains(&year)) + } + } + + #[test] + fn test_shift_months() { + let base = NaiveDate::from_ymd(2020, 1, 31); + + assert_eq!(shift_months(base, 0), NaiveDate::from_ymd(2020, 1, 31)); + assert_eq!(shift_months(base, 1), NaiveDate::from_ymd(2020, 2, 29)); + assert_eq!(shift_months(base, 2), NaiveDate::from_ymd(2020, 3, 31)); + assert_eq!(shift_months(base, 3), NaiveDate::from_ymd(2020, 4, 30)); + assert_eq!(shift_months(base, 4), NaiveDate::from_ymd(2020, 5, 31)); + assert_eq!(shift_months(base, 5), NaiveDate::from_ymd(2020, 6, 30)); + assert_eq!(shift_months(base, 6), NaiveDate::from_ymd(2020, 7, 31)); + assert_eq!(shift_months(base, 7), NaiveDate::from_ymd(2020, 8, 31)); + assert_eq!(shift_months(base, 8), NaiveDate::from_ymd(2020, 9, 30)); + assert_eq!(shift_months(base, 9), NaiveDate::from_ymd(2020, 10, 31)); + assert_eq!(shift_months(base, 10), NaiveDate::from_ymd(2020, 11, 30)); + assert_eq!(shift_months(base, 11), NaiveDate::from_ymd(2020, 12, 31)); + assert_eq!(shift_months(base, 12), NaiveDate::from_ymd(2021, 1, 31)); + assert_eq!(shift_months(base, 13), NaiveDate::from_ymd(2021, 2, 28)); + + assert_eq!(shift_months(base, -1), NaiveDate::from_ymd(2019, 12, 31)); + assert_eq!(shift_months(base, -2), NaiveDate::from_ymd(2019, 11, 30)); + assert_eq!(shift_months(base, -3), NaiveDate::from_ymd(2019, 10, 31)); + assert_eq!(shift_months(base, -4), NaiveDate::from_ymd(2019, 9, 30)); + assert_eq!(shift_months(base, -5), NaiveDate::from_ymd(2019, 8, 31)); + assert_eq!(shift_months(base, -6), NaiveDate::from_ymd(2019, 7, 31)); + assert_eq!(shift_months(base, -7), NaiveDate::from_ymd(2019, 6, 30)); + assert_eq!(shift_months(base, -8), NaiveDate::from_ymd(2019, 5, 31)); + assert_eq!(shift_months(base, -9), NaiveDate::from_ymd(2019, 4, 30)); + assert_eq!(shift_months(base, -10), NaiveDate::from_ymd(2019, 3, 31)); + assert_eq!(shift_months(base, -11), NaiveDate::from_ymd(2019, 2, 28)); + assert_eq!(shift_months(base, -12), NaiveDate::from_ymd(2019, 1, 31)); + assert_eq!(shift_months(base, -13), NaiveDate::from_ymd(2018, 12, 31)); + + assert_eq!(shift_months(base, 1265), NaiveDate::from_ymd(2125, 6, 30)); + } + + #[test] + fn test_shift_months_with_overflow() { + let base = NaiveDate::from_ymd(2020, 12, 31); + + assert_eq!(shift_months(base, 0), base); + assert_eq!(shift_months(base, 1), NaiveDate::from_ymd(2021, 1, 31)); + assert_eq!(shift_months(base, 2), NaiveDate::from_ymd(2021, 2, 28)); + assert_eq!(shift_months(base, 12), NaiveDate::from_ymd(2021, 12, 31)); + assert_eq!(shift_months(base, 18), NaiveDate::from_ymd(2022, 6, 30)); + + assert_eq!(shift_months(base, -1), NaiveDate::from_ymd(2020, 11, 30)); + assert_eq!(shift_months(base, -2), NaiveDate::from_ymd(2020, 10, 31)); + assert_eq!(shift_months(base, -10), NaiveDate::from_ymd(2020, 2, 29)); + assert_eq!(shift_months(base, -12), NaiveDate::from_ymd(2019, 12, 31)); + assert_eq!(shift_months(base, -18), NaiveDate::from_ymd(2019, 6, 30)); + } + + #[test] + fn test_shift_months_datetime() { + let date = NaiveDate::from_ymd(2020, 1, 31); + let o_clock = NaiveTime::from_hms(1, 2, 3); + + let base = NaiveDateTime::new(date, o_clock); + + assert_eq!( + shift_months(base, 0).date(), + NaiveDate::from_ymd(2020, 1, 31) + ); + assert_eq!( + shift_months(base, 1).date(), + NaiveDate::from_ymd(2020, 2, 29) + ); + assert_eq!( + shift_months(base, 2).date(), + NaiveDate::from_ymd(2020, 3, 31) + ); + assert_eq!(shift_months(base, 0).time(), o_clock); + assert_eq!(shift_months(base, 1).time(), o_clock); + assert_eq!(shift_months(base, 2).time(), o_clock); + } +} diff --git a/datafusion/physical-expr/src/expressions/mod.rs b/datafusion/physical-expr/src/expressions/mod.rs index 8eb95f2bfafd..7a78f4603e87 100644 --- a/datafusion/physical-expr/src/expressions/mod.rs +++ b/datafusion/physical-expr/src/expressions/mod.rs @@ -23,6 +23,7 @@ mod case; mod cast; mod column; mod datetime; +mod delta; mod get_indexed_field; mod in_list; mod is_not_null;