From 1869363fe14ab91dd7a10a1e71e588df58930978 Mon Sep 17 00:00:00 2001 From: berkaysynnada Date: Tue, 7 Mar 2023 10:37:51 +0300 Subject: [PATCH 01/23] first implementation and tests of timestamp subtraction --- datafusion/common/Cargo.toml | 1 + datafusion/common/src/scalar.rs | 709 ++++++++++++++++++++++++++++++-- 2 files changed, 684 insertions(+), 26 deletions(-) diff --git a/datafusion/common/Cargo.toml b/datafusion/common/Cargo.toml index 96367f0c1959..fa0d0c71a60c 100644 --- a/datafusion/common/Cargo.toml +++ b/datafusion/common/Cargo.toml @@ -47,4 +47,5 @@ num_cpus = "1.13.0" object_store = { version = "0.5.4", default-features = false, optional = true } parquet = { version = "34.0.0", default-features = false, optional = true } pyo3 = { version = "0.18.0", optional = true } +rand = "0.8.4" sqlparser = "0.30" diff --git a/datafusion/common/src/scalar.rs b/datafusion/common/src/scalar.rs index 2123281217ba..d9d7f3744b34 100644 --- a/datafusion/common/src/scalar.rs +++ b/datafusion/common/src/scalar.rs @@ -43,7 +43,9 @@ use arrow::{ DECIMAL128_MAX_PRECISION, }, }; -use chrono::{Datelike, Duration, NaiveDate, NaiveDateTime}; +use chrono::{ + DateTime, Datelike, Duration, FixedOffset, NaiveDate, NaiveDateTime, Timelike, +}; /// Represents a dynamically typed, nullable single value. /// This is the single-valued counter-part to arrow's [`Array`]. @@ -503,6 +505,29 @@ macro_rules! impl_op { (ScalarValue::Int8(lhs), ScalarValue::Int8(rhs)) => { primitive_op!(lhs, rhs, Int8, $OPERATION) } + ( + ScalarValue::TimestampNanosecond(Some(ts_lhs), tz_lhs), + ScalarValue::TimestampNanosecond(Some(ts_rhs), tz_rhs), + ) => Ok(ts_nanosec_sub_to_interval( + &ts_lhs, &ts_rhs, &tz_lhs, &tz_rhs, + )?), + ( + ScalarValue::TimestampMicrosecond(Some(ts_lhs), tz_lhs), + ScalarValue::TimestampMicrosecond(Some(ts_rhs), tz_rhs), + ) => Ok(ts_microsec_sub_to_interval( + &ts_lhs, &ts_rhs, &tz_lhs, &tz_rhs, + )?), + ( + ScalarValue::TimestampMillisecond(Some(ts_lhs), tz_lhs), + ScalarValue::TimestampMillisecond(Some(ts_rhs), tz_rhs), + ) => Ok(ts_millisec_sub_to_interval( + &ts_lhs, &ts_rhs, &tz_lhs, &tz_rhs, + )?), + ( + ScalarValue::TimestampSecond(Some(ts_lhs), tz_lhs), + ScalarValue::TimestampSecond(Some(ts_rhs), tz_rhs), + ) => Ok(ts_sec_sub_to_interval(&ts_lhs, &ts_rhs, &tz_lhs, &tz_rhs)?), + // Binary operations on arguments with different types: (ScalarValue::Date32(Some(days)), _) => { let value = date32_add(*days, $RHS, get_sign!($OPERATION))?; @@ -547,6 +572,272 @@ macro_rules! get_sign { }; } +// all timestamp variants are converted to nanosecond scale +#[inline] +fn ts_microsec_sub_to_interval( + lhs_ts: &i64, + rhs_ts: &i64, + lhs_tz: &Option, + rhs_tz: &Option, +) -> Result { + match (lhs_ts.checked_mul(1_000), rhs_ts.checked_mul(1_000)) { + (Some(lhs_ns), Some(rhs_ns)) => { + ts_nanosec_sub_to_interval(&lhs_ns, &rhs_ns, lhs_tz, rhs_tz) + } + (None, _) => Err(DataFusionError::NotImplemented(format!( + "overflow while conversion of {lhs_ts:?}" + ))), + (_, None) => Err(DataFusionError::NotImplemented(format!( + "overflow while conversion of {rhs_ts:?}" + ))), + } +} +#[inline] +fn ts_millisec_sub_to_interval( + lhs_ts: &i64, + rhs_ts: &i64, + lhs_tz: &Option, + rhs_tz: &Option, +) -> Result { + match (lhs_ts.checked_mul(1_000_000), rhs_ts.checked_mul(1_000_000)) { + (Some(lhs_ns), Some(rhs_ns)) => { + ts_nanosec_sub_to_interval(&lhs_ns, &rhs_ns, lhs_tz, rhs_tz) + } + (None, _) => Err(DataFusionError::NotImplemented(format!( + "overflow while conversion of {lhs_ts:?}" + ))), + (_, None) => Err(DataFusionError::NotImplemented(format!( + "overflow while conversion of {rhs_ts:?}" + ))), + } +} +#[inline] +fn ts_sec_sub_to_interval( + lhs_ts: &i64, + rhs_ts: &i64, + lhs_tz: &Option, + rhs_tz: &Option, +) -> Result { + match ( + lhs_ts.checked_mul(1_000_000_000), + rhs_ts.checked_mul(1_000_000_000), + ) { + (Some(lhs_ns), Some(rhs_ns)) => { + ts_nanosec_sub_to_interval(&lhs_ns, &rhs_ns, lhs_tz, rhs_tz) + } + (None, _) => Err(DataFusionError::NotImplemented(format!( + "overflow while conversion of {lhs_ts:?}" + ))), + (_, None) => Err(DataFusionError::NotImplemented(format!( + "overflow while conversion of {rhs_ts:?}" + ))), + } +} + +// Nanosecond-scale timestamps are subtracted to result in the narrowest interval variant. +// Interval variants are always consist of the same signed parts to handle comparison operations more wisely. +// For example, lhs < rhs => Interval(-, -, -), lhs > rhs => Interval(+, +, +) +fn ts_nanosec_sub_to_interval( + lhs_ts: &i64, + rhs_ts: &i64, + lhs_tz: &Option, + rhs_tz: &Option, +) -> Result { + // Conversion of integer and string-typed timestamps to NaiveDateTime objects + // Timezone offsets are added also if applicable. + let (naive_date_time2_unchecked, naive_date_time1_unchecked); + if let (Some(l), Some(r)) = (lhs_tz, rhs_tz) { + (naive_date_time2_unchecked, naive_date_time1_unchecked) = + integer_w_timezone_to_naive_datetime(lhs_ts, rhs_ts, l, r)?; + } else { + (naive_date_time2_unchecked, naive_date_time1_unchecked) = + integer_to_naive_datetime(lhs_ts, rhs_ts)?; + } + + // Check whether we will find a negative interval or not + let (naive_date_time2, naive_date_time1, sign) = + find_interval_sign(naive_date_time2_unchecked, naive_date_time1_unchecked); + + // Subtraction of datetimes. Details are inside the function. + let (mut months, mut months_residual) = + datetime_month_sub_with_rem(naive_date_time2, naive_date_time1)?; + + // Check whether we can return an IntervalYearMonth variant without losing information + match months_residual.num_nanoseconds() { + Some(value) => { + if value == 0 { + return Ok(ScalarValue::IntervalYearMonth(Some(sign * months))); + } + } + None => { + return Err(DataFusionError::NotImplemented(String::from( + "months_residual nanosec overflow", + ))) + } + } + + // If months_residual is negative, take one month from months and + // add it to months_residual to make it positive. + // To ensure the difference is positive all the time, we take the days + // of previous datetime's month. + if months_residual.num_nanoseconds() < Some(0) { + (months, months_residual) = + normalize_duration(&months, &months_residual, naive_date_time1)?; + } + + // Check whether we can return an IntervalDayTime variant without losing information + let months_residual_in_ns = months_residual.num_nanoseconds().unwrap(); + if months_residual_in_ns % 1_000_000 == 0 { + let delta_secs = naive_date_time2 + .signed_duration_since(naive_date_time1) + .num_milliseconds(); + + return Ok(ScalarValue::IntervalDayTime(Some( + IntervalDayTimeType::make_value( + sign * (delta_secs / 86_400_000) as i32, + sign * (delta_secs % 86_400_000) as i32, + ), + ))); + } + + Ok(ScalarValue::IntervalMonthDayNano(Some( + IntervalMonthDayNanoType::make_value( + sign * months, + sign * (months_residual_in_ns / 86_400_000_000_000) as i32, + sign as i64 * (months_residual_in_ns % 86_400_000_000_000), + ), + ))) +} +#[inline] +fn integer_to_naive_datetime( + lhs_ts_ns: &i64, + rhs_ts_ns: &i64, +) -> Result<(NaiveDateTime, NaiveDateTime), DataFusionError> { + match ( + NaiveDateTime::from_timestamp_opt( + lhs_ts_ns / 1_000_000_000, + (lhs_ts_ns % 1_000_000_000) as u32, + ), + NaiveDateTime::from_timestamp_opt( + rhs_ts_ns / 1_000_000_000, + (rhs_ts_ns % 1_000_000_000) as u32, + ), + ) { + (Some(x), Some(y)) => Ok((x, y)), + (x, y) => Err(DataFusionError::NotImplemented(format!( + "timestamps {x:?} or {y:?} cannot be converted to datetimes", + ))), + } +} +#[inline] +fn integer_w_timezone_to_naive_datetime( + lhs_ts_ns: &i64, + rhs_ts_ns: &i64, + lhs_tz: &String, + rhs_tz: &String, +) -> Result<(NaiveDateTime, NaiveDateTime), DataFusionError> { + let (naive_lhs, naive_rhs) = integer_to_naive_datetime(lhs_ts_ns, rhs_ts_ns)?; + + match (parse_tz_to_offset(lhs_tz), parse_tz_to_offset(rhs_tz)) { + (Some(l), Some(r)) => Ok(( + DateTime::::from_utc(naive_lhs, l).naive_local(), + DateTime::::from_utc(naive_rhs, r).naive_local(), + )), + (_, _) => Ok((naive_lhs, naive_rhs)), + } +} +// This function parses as the format of "+HH:MM", for example, "+05:30" +#[inline] +fn parse_tz_to_offset(tz: &String) -> Option { + let sign = tz.chars().next().unwrap(); + let hours = tz[1..3].parse::().unwrap(); + let minutes = tz[4..6].parse::().unwrap(); + let timezone_offset = match sign { + '-' => FixedOffset::east_opt(hours * 3600 + minutes * 60).unwrap(), + '+' => FixedOffset::west_opt(hours * 3600 + minutes * 60).unwrap(), + _ => panic!("Invalid timezone string: {}", tz), + }; + Some(timezone_offset) +} +#[inline] +fn find_interval_sign( + ndt2: NaiveDateTime, + ndt1: NaiveDateTime, +) -> (NaiveDateTime, NaiveDateTime, i32) { + let sign; + if ndt2.timestamp_nanos() < ndt1.timestamp_nanos() { + sign = -1; + (ndt1, ndt2, sign) + } else { + sign = 1; + (ndt2, ndt1, sign) + } +} +#[inline] +fn datetime_month_sub_with_rem( + date_time2: NaiveDateTime, + date_time1: NaiveDateTime, +) -> Result<(i32, Duration), DataFusionError> { + // The difference of total months. Since this operation ignores the days of dates, + // that month count may be decreased by 1 in case of negative day count. + let months = (date_time2.year() - date_time1.year()) * 12 + + (date_time2.month() as i32 - date_time1.month() as i32); + + // months_residual is in the form of X secs, Y nanosecs. + // Y cannot be larger than 1_000_000_000, it is rounded up to seconds. + // The subtractions may overflow, so cast i64. + let months_residual = + Duration::days(date_time2.day() as i64 - date_time1.day() as i64) + + Duration::hours(date_time2.hour() as i64 - date_time1.hour() as i64) + + Duration::minutes(date_time2.minute() as i64 - date_time1.minute() as i64) + + Duration::seconds(date_time2.second() as i64 - date_time1.second() as i64) + + Duration::nanoseconds( + date_time2.nanosecond() as i64 - date_time1.nanosecond() as i64, + ); + + Ok((months, months_residual)) +} +#[inline] +fn normalize_duration( + months: &i32, + months_residual: &Duration, + at_month: NaiveDateTime, +) -> Result<(i32, Duration), DataFusionError> { + // For example, if the previous datetime's month and date is (Feb, 15), + // when we add the days of that month to month_residual + // variable, we need to add the february's day count. + // To ensure the difference is positive all the time, we take the days + // of previous datetime's month. + let added_days = + &Duration::days(days_in_month(at_month.year(), at_month.month())?.into()); + let months_residual_new = match months_residual.checked_add(added_days) { + Some(value) => value, + None => { + return Err(DataFusionError::NotImplemented(format!( + "normalize duration error, cannot add {added_days:?} days to {months_residual:?}", + ))) + } + }; + let months_new = months - 1; + Ok((months_new, months_residual_new)) +} +#[inline] +// It gives the day count of the corresponding month at that year. +fn days_in_month(year: i32, month: u32) -> Result { + if let Some(first_day) = NaiveDate::from_ymd_opt(year, month, 1) { + let last_day = first_day + .with_month(month + 1) + .unwrap_or_else(|| NaiveDate::from_ymd_opt(year + 1, 1, 1).unwrap()) + .pred_opt(); + if let Some(days) = last_day { + return Ok(days.day()); + } + } + Err(DataFusionError::NotImplemented(format!( + "invalid date parameters, year: {year:?} & month: {month:?}", + ))) +} + #[inline] pub fn date32_add(days: i32, scalar: &ScalarValue, sign: i32) -> Result { let epoch = NaiveDate::from_ymd_opt(1970, 1, 1).unwrap(); @@ -1032,8 +1323,8 @@ impl ScalarValue { DataType::UInt16 => ScalarValue::UInt16(Some(0)), DataType::UInt32 => ScalarValue::UInt32(Some(0)), DataType::UInt64 => ScalarValue::UInt64(Some(0)), - DataType::Float32 => ScalarValue::Float32(Some(0.0)), - DataType::Float64 => ScalarValue::Float64(Some(0.0)), + DataType::Float32 => ScalarValue::UInt64(Some(0)), + DataType::Float64 => ScalarValue::UInt64(Some(0)), _ => { return Err(DataFusionError::NotImplemented(format!( "Can't create a zero scalar from data_type \"{datatype:?}\"" @@ -1296,7 +1587,7 @@ impl ScalarValue { } macro_rules! build_array_primitive_tz { - ($ARRAY_TY:ident, $SCALAR_TY:ident, $TZ:expr) => {{ + ($ARRAY_TY:ident, $SCALAR_TY:ident) => {{ { let array = scalars.map(|sv| { if let ScalarValue::$SCALAR_TY(v, _) = sv { @@ -1310,7 +1601,7 @@ impl ScalarValue { } }) .collect::>()?; - Arc::new(array.with_timezone_opt($TZ.clone())) + Arc::new(array) } }}; } @@ -1444,29 +1735,17 @@ impl ScalarValue { DataType::Time64(TimeUnit::Nanosecond) => { build_array_primitive!(Time64NanosecondArray, Time64Nanosecond) } - DataType::Timestamp(TimeUnit::Second, tz) => { - build_array_primitive_tz!(TimestampSecondArray, TimestampSecond, tz) + DataType::Timestamp(TimeUnit::Second, _) => { + build_array_primitive_tz!(TimestampSecondArray, TimestampSecond) } - DataType::Timestamp(TimeUnit::Millisecond, tz) => { - build_array_primitive_tz!( - TimestampMillisecondArray, - TimestampMillisecond, - tz - ) + DataType::Timestamp(TimeUnit::Millisecond, _) => { + build_array_primitive_tz!(TimestampMillisecondArray, TimestampMillisecond) } - DataType::Timestamp(TimeUnit::Microsecond, tz) => { - build_array_primitive_tz!( - TimestampMicrosecondArray, - TimestampMicrosecond, - tz - ) + DataType::Timestamp(TimeUnit::Microsecond, _) => { + build_array_primitive_tz!(TimestampMicrosecondArray, TimestampMicrosecond) } - DataType::Timestamp(TimeUnit::Nanosecond, tz) => { - build_array_primitive_tz!( - TimestampNanosecondArray, - TimestampNanosecond, - tz - ) + DataType::Timestamp(TimeUnit::Nanosecond, _) => { + build_array_primitive_tz!(TimestampNanosecondArray, TimestampNanosecond) } DataType::Interval(IntervalUnit::DayTime) => { build_array_primitive!(IntervalDayTimeArray, IntervalDayTime) @@ -2659,7 +2938,7 @@ impl TryFrom<&DataType> for ScalarValue { macro_rules! format_option { ($F:expr, $EXPR:expr) => {{ match $EXPR { - Some(e) => write!($F, "{e}"), + Some(e) => write!($F, "{}", e), None => write!($F, "NULL"), } }}; @@ -2887,6 +3166,7 @@ mod tests { use arrow::compute::kernels; use arrow::datatypes::ArrowPrimitiveType; + use rand::Rng; use crate::cast::{as_string_array, as_uint32_array, as_uint64_array}; use crate::from_slice::FromSlice; @@ -4430,4 +4710,381 @@ mod tests { assert!(distance.is_none()); } } + + #[test] + fn timestamp_op_tests() { + // positive interval, edge cases + let timestamps_next = new_timestamps_next(); + let timestamps_prev = new_timestamps_prev(); + let expected_results = new_expected_results(1); + for (idx, exp) in expected_results.iter().enumerate() { + assert_eq!( + *exp, + timestamps_next[idx].sub(×tamps_prev[idx]).unwrap() + ) + } + + // negative interval, edge cases + let timestamps_next = new_timestamps_prev(); + let timestamps_prev = new_timestamps_next(); + let expected_results = new_expected_results(-1); + for (idx, exp) in expected_results.iter().enumerate() { + assert_eq!( + *exp, + timestamps_next[idx].sub(×tamps_prev[idx]).unwrap() + ); + } + + // timestamp1 + (or -) interval = timestamp2 + // timestamp2 - timestamp1 (or timestamp1 - timestamp2) = interval ? + let sample_size = 100000; + let timestamps1 = get_random_timestamps1(sample_size); + let intervals = get_random_intervals(sample_size); + // ts(sec) + interval(ns) = ts(sec); however, + // ts(sec) - ts(sec) cannot be = interval(ns). Therefore, + // timestamps are more precise than intervals in tests. + let mut timestamp2: ScalarValue; + for (idx, ts1) in timestamps1.iter().enumerate() { + if idx % 2 == 0 { + timestamp2 = ts1.add(intervals[idx].clone()).unwrap(); + println!( + "{:?}, {:?}, {:?}, {:?}", + idx, timestamp2, ts1, intervals[idx] + ); + assert_eq!(intervals[idx], timestamp2.sub(ts1).unwrap()); + } else { + timestamp2 = ts1.sub(intervals[idx].clone()).unwrap(); + println!( + "{:?}, {:?}, {:?}, {:?}", + idx, timestamp2, ts1, intervals[idx] + ); + assert_eq!(intervals[idx], ts1.sub(timestamp2).unwrap()); + }; + } + } + + fn new_timestamps_next() -> Vec { + vec![ + // ScalarValue::TimestampNanosecond(Some(1308158638939668236), None), + ScalarValue::TimestampNanosecond( + Some( + NaiveDate::from_ymd_opt(2023, 1, 1) + .unwrap() + .and_hms_nano_opt(1, 0, 0, 000_000_000) + .unwrap() + .timestamp_nanos(), + ), + Some("+01:00".to_string()), + ), + ScalarValue::TimestampMicrosecond( + Some( + NaiveDate::from_ymd_opt(2023, 3, 1) + .unwrap() + .and_hms_micro_opt(2, 0, 0, 000_000) + .unwrap() + .timestamp_micros(), + ), + Some("+01:00".to_string()), + ), + ScalarValue::TimestampMillisecond( + Some( + NaiveDate::from_ymd_opt(2023, 3, 1) + .unwrap() + .and_hms_milli_opt(10, 10, 0, 000) + .unwrap() + .timestamp_millis(), + ), + Some("+10:10".to_string()), + ), + ScalarValue::TimestampSecond( + Some( + NaiveDate::from_ymd_opt(2023, 3, 1) + .unwrap() + .and_hms_opt(0, 0, 0) + .unwrap() + .timestamp(), + ), + Some("-11:59".to_string()), + ), + ScalarValue::TimestampMillisecond( + Some( + NaiveDate::from_ymd_opt(2023, 3, 1) + .unwrap() + .and_hms_milli_opt(23, 58, 0, 250) + .unwrap() + .timestamp_millis(), + ), + Some("+11:59".to_string()), + ), + ScalarValue::TimestampMicrosecond( + Some( + NaiveDate::from_ymd_opt(2023, 3, 1) + .unwrap() + .and_hms_micro_opt(0, 0, 0, 15) + .unwrap() + .timestamp_micros(), + ), + None, + ), + ScalarValue::TimestampNanosecond( + Some( + NaiveDate::from_ymd_opt(2023, 3, 1) + .unwrap() + .and_hms_nano_opt(0, 0, 0, 22) + .unwrap() + .timestamp_nanos(), + ), + None, + ), + ScalarValue::TimestampSecond( + Some( + NaiveDate::from_ymd_opt(2023, 3, 1) + .unwrap() + .and_hms_opt(0, 0, 0) + .unwrap() + .timestamp(), + ), + None, + ), + ScalarValue::TimestampSecond( + Some( + NaiveDate::from_ymd_opt(2023, 12, 1) + .unwrap() + .and_hms_opt(0, 0, 0) + .unwrap() + .timestamp(), + ), + None, + ), + ] + } + + fn new_timestamps_prev() -> Vec { + vec![ + // ScalarValue::TimestampNanosecond(Some(1171521569027710670), None), + ScalarValue::TimestampNanosecond( + Some( + NaiveDate::from_ymd_opt(2023, 1, 1) + .unwrap() + .and_hms_nano_opt(0, 0, 0, 000_000_000) + .unwrap() + .timestamp_nanos(), + ), + Some("+00:00".to_string()), + ), + ScalarValue::TimestampMicrosecond( + Some( + NaiveDate::from_ymd_opt(2023, 1, 1) + .unwrap() + .and_hms_micro_opt(0, 0, 0, 000_000) + .unwrap() + .timestamp_micros(), + ), + Some("-01:00".to_string()), + ), + ScalarValue::TimestampMillisecond( + Some( + NaiveDate::from_ymd_opt(2023, 1, 1) + .unwrap() + .and_hms_milli_opt(1, 0, 0, 000) + .unwrap() + .timestamp_millis(), + ), + Some("+01:00".to_string()), + ), + ScalarValue::TimestampSecond( + Some( + NaiveDate::from_ymd_opt(2023, 1, 1) + .unwrap() + .and_hms_opt(23, 58, 0) + .unwrap() + .timestamp(), + ), + Some("+11:59".to_string()), + ), + ScalarValue::TimestampMillisecond( + Some( + NaiveDate::from_ymd_opt(2023, 1, 1) + .unwrap() + .and_hms_milli_opt(0, 0, 0, 000) + .unwrap() + .timestamp_millis(), + ), + Some("-11:59".to_string()), + ), + ScalarValue::TimestampMicrosecond( + Some( + NaiveDate::from_ymd_opt(2023, 1, 1) + .unwrap() + .and_hms_micro_opt(0, 0, 0, 000_000) + .unwrap() + .timestamp_micros(), + ), + None, + ), + ScalarValue::TimestampNanosecond( + Some( + NaiveDate::from_ymd_opt(2023, 1, 31) + .unwrap() + .and_hms_nano_opt(0, 0, 0, 000_000_000) + .unwrap() + .timestamp_nanos(), + ), + None, + ), + ScalarValue::TimestampSecond( + Some( + NaiveDate::from_ymd_opt(2021, 12, 30) + .unwrap() + .and_hms_opt(0, 0, 30) + .unwrap() + .timestamp(), + ), + None, + ), + ScalarValue::TimestampSecond( + Some( + NaiveDate::from_ymd_opt(1980, 11, 1) + .unwrap() + .and_hms_opt(0, 0, 0) + .unwrap() + .timestamp(), + ), + None, + ), + ] + } + + fn new_expected_results(sign: i32) -> Vec { + vec![ + // ScalarValue::IntervalMonthDayNano(Some(4040636288743990090004520869950)), + ScalarValue::IntervalYearMonth(Some(IntervalYearMonthType::make_value(0, 0))), + ScalarValue::IntervalYearMonth(Some(IntervalYearMonthType::make_value( + 0, + sign * 2, + ))), + ScalarValue::IntervalYearMonth(Some(IntervalYearMonthType::make_value( + 0, + sign * 2, + ))), + ScalarValue::IntervalYearMonth(Some(IntervalYearMonthType::make_value( + 0, + sign * 2, + ))), + ScalarValue::IntervalDayTime(Some(IntervalDayTimeType::make_value( + sign * 59, + sign * 250, + ))), + ScalarValue::IntervalMonthDayNano(Some( + IntervalMonthDayNanoType::make_value(sign * 2, 0, sign as i64 * 15_000), + )), + ScalarValue::IntervalMonthDayNano(Some( + IntervalMonthDayNanoType::make_value(sign, sign, sign as i64 * 22), + )), + ScalarValue::IntervalDayTime(Some(IntervalDayTimeType::make_value( + sign * 425, + sign * 86370000, + ))), + ScalarValue::IntervalYearMonth(Some(IntervalYearMonthType::make_value( + sign * 43, + sign, + ))), + ] + } + + fn get_random_timestamps1(sample_size: u64) -> Vec { + let vector_size = sample_size; + let mut timestamp = vec![]; + let mut rng = rand::thread_rng(); + for i in 0..vector_size { + let year = rng.gen_range(1995..=2050); + let month = rng.gen_range(1..=12); + let day = rng.gen_range(1..=28); + let hour = rng.gen_range(0..=23); + let minute = rng.gen_range(0..=59); + let second = rng.gen_range(0..=59); + if i % 4 == 0 { + timestamp.push(ScalarValue::TimestampSecond( + Some( + NaiveDate::from_ymd_opt(year, month, day) + .unwrap() + .and_hms_opt(hour, minute, second) + .unwrap() + .timestamp(), + ), + None, + )) + } else if i % 4 == 1 { + let millisec = rng.gen_range(0..=999); + timestamp.push(ScalarValue::TimestampMillisecond( + Some( + NaiveDate::from_ymd_opt(year, month, day) + .unwrap() + .and_hms_milli_opt(hour, minute, second, millisec) + .unwrap() + .timestamp_millis(), + ), + None, + )) + } else if i % 4 == 2 { + let microsec = rng.gen_range(0..=999_999); + timestamp.push(ScalarValue::TimestampMicrosecond( + Some( + NaiveDate::from_ymd_opt(year, month, day) + .unwrap() + .and_hms_micro_opt(hour, minute, second, microsec) + .unwrap() + .timestamp_micros(), + ), + None, + )) + } else if i % 4 == 3 { + let nanosec = rng.gen_range(0..=999_999_999); + timestamp.push(ScalarValue::TimestampNanosecond( + Some( + NaiveDate::from_ymd_opt(year, month, day) + .unwrap() + .and_hms_nano_opt(hour, minute, second, nanosec) + .unwrap() + .timestamp_nanos(), + ), + None, + )) + } + } + timestamp + } + + fn get_random_intervals(sample_size: u64) -> Vec { + let vector_size = sample_size; + let mut intervals = vec![]; + let mut rng = rand::thread_rng(); + for i in 0..vector_size { + if i % 3 == 2 && i % 4 == 3 { + let month = rng.gen_range(0..=100); + // there is an test issue for the days 28(29). + // for example, if we have an expected interval 2 months 28(29) days, + // the subtractor finds it as 3 months if the previous timestamp + // is at february. + let day = rng.gen_range(0..=27); + let nanosec = rng.gen_range(0..86_400_000_000_000); + intervals.push(ScalarValue::IntervalMonthDayNano(Some( + IntervalMonthDayNanoType::make_value(month, day, nanosec), + ))); + } else if i % 3 == 1 && i % 4 != 0 { + let day = rng.gen_range(0..=5000); + let millisec = rng.gen_range(0..86_400_000); + intervals.push(ScalarValue::IntervalDayTime(Some( + IntervalDayTimeType::make_value(day, millisec), + ))) + } else { + let year = rng.gen_range(0..=20); + let month = rng.gen_range(0..=50); + intervals.push(ScalarValue::IntervalYearMonth(Some( + IntervalYearMonthType::make_value(year, month), + ))) + } + } + intervals + } } From 2f0127832569f49cb3e1a2ed8e0a0fe7692cbc53 Mon Sep 17 00:00:00 2001 From: berkaysynnada Date: Tue, 7 Mar 2023 15:18:19 +0300 Subject: [PATCH 02/23] improvement after review --- datafusion/common/src/scalar.rs | 122 ++++++++++++++++++-------------- 1 file changed, 69 insertions(+), 53 deletions(-) diff --git a/datafusion/common/src/scalar.rs b/datafusion/common/src/scalar.rs index d9d7f3744b34..c98b0ff28f6d 100644 --- a/datafusion/common/src/scalar.rs +++ b/datafusion/common/src/scalar.rs @@ -645,14 +645,12 @@ fn ts_nanosec_sub_to_interval( ) -> Result { // Conversion of integer and string-typed timestamps to NaiveDateTime objects // Timezone offsets are added also if applicable. - let (naive_date_time2_unchecked, naive_date_time1_unchecked); - if let (Some(l), Some(r)) = (lhs_tz, rhs_tz) { - (naive_date_time2_unchecked, naive_date_time1_unchecked) = - integer_w_timezone_to_naive_datetime(lhs_ts, rhs_ts, l, r)?; - } else { - (naive_date_time2_unchecked, naive_date_time1_unchecked) = - integer_to_naive_datetime(lhs_ts, rhs_ts)?; - } + let (naive_date_time2_unchecked, naive_date_time1_unchecked) = + if let (Some(l), Some(r)) = (lhs_tz, rhs_tz) { + integer_w_timezone_to_naive_datetime(lhs_ts, rhs_ts, l, r)? + } else { + integer_to_naive_datetime(lhs_ts, rhs_ts)? + }; // Check whether we will find a negative interval or not let (naive_date_time2, naive_date_time1, sign) = @@ -662,18 +660,13 @@ fn ts_nanosec_sub_to_interval( let (mut months, mut months_residual) = datetime_month_sub_with_rem(naive_date_time2, naive_date_time1)?; + let err = || { + DataFusionError::NotImplemented(String::from("months_residual nanosec overflow")) + }; // Check whether we can return an IntervalYearMonth variant without losing information - match months_residual.num_nanoseconds() { - Some(value) => { - if value == 0 { - return Ok(ScalarValue::IntervalYearMonth(Some(sign * months))); - } - } - None => { - return Err(DataFusionError::NotImplemented(String::from( - "months_residual nanosec overflow", - ))) - } + let value = months_residual.num_nanoseconds().ok_or_else(err)?; + if value == 0 { + return Ok(ScalarValue::IntervalYearMonth(Some(sign * months))); } // If months_residual is negative, take one month from months and @@ -686,12 +679,12 @@ fn ts_nanosec_sub_to_interval( } // Check whether we can return an IntervalDayTime variant without losing information - let months_residual_in_ns = months_residual.num_nanoseconds().unwrap(); + let months_residual_in_ns = months_residual.num_nanoseconds().ok_or_else(err)?; if months_residual_in_ns % 1_000_000 == 0 { let delta_secs = naive_date_time2 .signed_duration_since(naive_date_time1) .num_milliseconds(); - + // 60 * 60 * 24 * 1000 = 86_400_000, number of millisecs in a day return Ok(ScalarValue::IntervalDayTime(Some( IntervalDayTimeType::make_value( sign * (delta_secs / 86_400_000) as i32, @@ -700,6 +693,7 @@ fn ts_nanosec_sub_to_interval( ))); } + // 60 * 60 * 24 * 1000 * 1000 * 1000 = 86_400_000_000_000, number of nanosecs in a day Ok(ScalarValue::IntervalMonthDayNano(Some( IntervalMonthDayNanoType::make_value( sign * months, @@ -739,7 +733,7 @@ fn integer_w_timezone_to_naive_datetime( let (naive_lhs, naive_rhs) = integer_to_naive_datetime(lhs_ts_ns, rhs_ts_ns)?; match (parse_tz_to_offset(lhs_tz), parse_tz_to_offset(rhs_tz)) { - (Some(l), Some(r)) => Ok(( + (Ok(l), Ok(r)) => Ok(( DateTime::::from_utc(naive_lhs, l).naive_local(), DateTime::::from_utc(naive_rhs, r).naive_local(), )), @@ -748,31 +742,39 @@ fn integer_w_timezone_to_naive_datetime( } // This function parses as the format of "+HH:MM", for example, "+05:30" #[inline] -fn parse_tz_to_offset(tz: &String) -> Option { - let sign = tz.chars().next().unwrap(); - let hours = tz[1..3].parse::().unwrap(); - let minutes = tz[4..6].parse::().unwrap(); +fn parse_tz_to_offset(tz: &String) -> Result { + let err_str = &String::from("error while parsing timezone"); + let err = || DataFusionError::NotImplemented(err_str.to_string()); + + let sign = tz.chars().next().ok_or_else(err)?; + let hours = tz[1..3] + .parse::() + .map_err(|_e| DataFusionError::NotImplemented(err_str.to_string()))?; + let minutes = tz[4..6] + .parse::() + .map_err(|_e| DataFusionError::NotImplemented(err_str.to_string()))?; let timezone_offset = match sign { - '-' => FixedOffset::east_opt(hours * 3600 + minutes * 60).unwrap(), - '+' => FixedOffset::west_opt(hours * 3600 + minutes * 60).unwrap(), - _ => panic!("Invalid timezone string: {}", tz), + '-' => FixedOffset::east_opt(hours * 3600 + minutes * 60).ok_or_else(err)?, + '+' => FixedOffset::west_opt(hours * 3600 + minutes * 60).ok_or_else(err)?, + _ => { + return Err(DataFusionError::NotImplemented(err_str.to_string())); + } }; - Some(timezone_offset) + Ok(timezone_offset) } #[inline] fn find_interval_sign( ndt2: NaiveDateTime, ndt1: NaiveDateTime, ) -> (NaiveDateTime, NaiveDateTime, i32) { - let sign; if ndt2.timestamp_nanos() < ndt1.timestamp_nanos() { - sign = -1; - (ndt1, ndt2, sign) + (ndt1, ndt2, -1) } else { - sign = 1; - (ndt2, ndt1, sign) + (ndt2, ndt1, 1) } } +// This function assumes 'date_time2' is greater than 'date_time1', +// therefore; resulting 'months' cannot be negative. #[inline] fn datetime_month_sub_with_rem( date_time2: NaiveDateTime, @@ -825,11 +827,13 @@ fn normalize_duration( // It gives the day count of the corresponding month at that year. fn days_in_month(year: i32, month: u32) -> Result { if let Some(first_day) = NaiveDate::from_ymd_opt(year, month, 1) { - let last_day = first_day - .with_month(month + 1) - .unwrap_or_else(|| NaiveDate::from_ymd_opt(year + 1, 1, 1).unwrap()) - .pred_opt(); - if let Some(days) = last_day { + let last_day = match first_day.with_month(month + 1) { + Some(day) => day, + None => NaiveDate::from_ymd_opt(year + 1, 1, 1).ok_or_else(|| { + DataFusionError::NotImplemented(format!("out-of-range year",)) + })?, + }; + if let Some(days) = last_day.pred_opt() { return Ok(days.day()); } } @@ -1323,8 +1327,8 @@ impl ScalarValue { DataType::UInt16 => ScalarValue::UInt16(Some(0)), DataType::UInt32 => ScalarValue::UInt32(Some(0)), DataType::UInt64 => ScalarValue::UInt64(Some(0)), - DataType::Float32 => ScalarValue::UInt64(Some(0)), - DataType::Float64 => ScalarValue::UInt64(Some(0)), + DataType::Float32 => ScalarValue::Float32(Some(0.0)), + DataType::Float64 => ScalarValue::Float64(Some(0.0)), _ => { return Err(DataFusionError::NotImplemented(format!( "Can't create a zero scalar from data_type \"{datatype:?}\"" @@ -1587,7 +1591,7 @@ impl ScalarValue { } macro_rules! build_array_primitive_tz { - ($ARRAY_TY:ident, $SCALAR_TY:ident) => {{ + ($ARRAY_TY:ident, $SCALAR_TY:ident, $TZ:expr) => {{ { let array = scalars.map(|sv| { if let ScalarValue::$SCALAR_TY(v, _) = sv { @@ -1601,7 +1605,7 @@ impl ScalarValue { } }) .collect::>()?; - Arc::new(array) + Arc::new(array.with_timezone_opt($TZ.clone())) } }}; } @@ -1735,17 +1739,29 @@ impl ScalarValue { DataType::Time64(TimeUnit::Nanosecond) => { build_array_primitive!(Time64NanosecondArray, Time64Nanosecond) } - DataType::Timestamp(TimeUnit::Second, _) => { - build_array_primitive_tz!(TimestampSecondArray, TimestampSecond) + DataType::Timestamp(TimeUnit::Second, tz) => { + build_array_primitive_tz!(TimestampSecondArray, TimestampSecond, tz) } - DataType::Timestamp(TimeUnit::Millisecond, _) => { - build_array_primitive_tz!(TimestampMillisecondArray, TimestampMillisecond) + DataType::Timestamp(TimeUnit::Millisecond, tz) => { + build_array_primitive_tz!( + TimestampMillisecondArray, + TimestampMillisecond, + tz + ) } - DataType::Timestamp(TimeUnit::Microsecond, _) => { - build_array_primitive_tz!(TimestampMicrosecondArray, TimestampMicrosecond) + DataType::Timestamp(TimeUnit::Microsecond, tz) => { + build_array_primitive_tz!( + TimestampMicrosecondArray, + TimestampMicrosecond, + tz + ) } - DataType::Timestamp(TimeUnit::Nanosecond, _) => { - build_array_primitive_tz!(TimestampNanosecondArray, TimestampNanosecond) + DataType::Timestamp(TimeUnit::Nanosecond, tz) => { + build_array_primitive_tz!( + TimestampNanosecondArray, + TimestampNanosecond, + tz + ) } DataType::Interval(IntervalUnit::DayTime) => { build_array_primitive!(IntervalDayTimeArray, IntervalDayTime) @@ -2938,7 +2954,7 @@ impl TryFrom<&DataType> for ScalarValue { macro_rules! format_option { ($F:expr, $EXPR:expr) => {{ match $EXPR { - Some(e) => write!($F, "{}", e), + Some(e) => write!($F, "{e}"), None => write!($F, "NULL"), } }}; From 806b4d3a4f9199ab8f69253fbb96e1bc2fb6712c Mon Sep 17 00:00:00 2001 From: berkaysynnada Date: Wed, 8 Mar 2023 13:25:25 +0300 Subject: [PATCH 03/23] postgre interval format option --- datafusion/common/src/scalar.rs | 184 +++++++++++++++++++++++++------- 1 file changed, 148 insertions(+), 36 deletions(-) diff --git a/datafusion/common/src/scalar.rs b/datafusion/common/src/scalar.rs index c98b0ff28f6d..2e31b24811d4 100644 --- a/datafusion/common/src/scalar.rs +++ b/datafusion/common/src/scalar.rs @@ -634,10 +634,26 @@ fn ts_sec_sub_to_interval( } } +// This function will be removed once the result format is clarified. +fn ts_nanosec_sub_to_interval( + lhs_ts: &i64, + rhs_ts: &i64, + lhs_tz: &Option, + rhs_tz: &Option, +) -> Result { + let round_up_to_month = true; + + if round_up_to_month { + ts_nanosec_sub_to_interval_months(lhs_ts, rhs_ts, lhs_tz, rhs_tz) + } else { + ts_nanosec_sub_to_interval_days(lhs_ts, rhs_ts, lhs_tz, rhs_tz) + } +} + // Nanosecond-scale timestamps are subtracted to result in the narrowest interval variant. // Interval variants are always consist of the same signed parts to handle comparison operations more wisely. // For example, lhs < rhs => Interval(-, -, -), lhs > rhs => Interval(+, +, +) -fn ts_nanosec_sub_to_interval( +fn ts_nanosec_sub_to_interval_months( lhs_ts: &i64, rhs_ts: &i64, lhs_tz: &Option, @@ -702,6 +718,51 @@ fn ts_nanosec_sub_to_interval( ), ))) } + +// Nanosecond-scale timestamps are subtracted to result in the narrowest interval variant. +// Interval variants are always consist of the same signed parts to handle comparison operations more wisely. +// For example, lhs < rhs => Interval(-, -, -), lhs > rhs => Interval(+, +, +) +fn ts_nanosec_sub_to_interval_days( + lhs_ts: &i64, + rhs_ts: &i64, + lhs_tz: &Option, + rhs_tz: &Option, +) -> Result { + // Conversion of integer and string-typed timestamps to NaiveDateTime objects + // Timezone offsets are added also if applicable. + let (naive_date_time2_unchecked, naive_date_time1_unchecked) = + if let (Some(l), Some(r)) = (lhs_tz, rhs_tz) { + integer_w_timezone_to_naive_datetime(lhs_ts, rhs_ts, l, r)? + } else { + integer_to_naive_datetime(lhs_ts, rhs_ts)? + }; + + // Check whether we will find a negative interval or not + let (naive_date_time2, naive_date_time1, sign) = + find_interval_sign(naive_date_time2_unchecked, naive_date_time1_unchecked); + + // Subtraction of datetimes. Details are inside the function. + let duration_in_nanosec = datetime_day_sub(naive_date_time2, naive_date_time1)?; + + // Try to return in IntervalDayTime + if duration_in_nanosec % 1_000_000 == 0 { + return Ok(ScalarValue::IntervalDayTime(Some( + IntervalDayTimeType::make_value( + sign * (duration_in_nanosec / 86_400_000_000_000) as i32, + sign * ((duration_in_nanosec / 1_000_000) % 86_400_000) as i32, + ), + ))); + } + + // The last option IntervalMonthDayNano + Ok(ScalarValue::IntervalMonthDayNano(Some( + IntervalMonthDayNanoType::make_value( + 0, + sign * (duration_in_nanosec / 86_400_000_000_000) as i32, + sign as i64 * (duration_in_nanosec % 86_400_000_000_000), + ), + ))) +} #[inline] fn integer_to_naive_datetime( lhs_ts_ns: &i64, @@ -727,8 +788,8 @@ fn integer_to_naive_datetime( fn integer_w_timezone_to_naive_datetime( lhs_ts_ns: &i64, rhs_ts_ns: &i64, - lhs_tz: &String, - rhs_tz: &String, + lhs_tz: &str, + rhs_tz: &str, ) -> Result<(NaiveDateTime, NaiveDateTime), DataFusionError> { let (naive_lhs, naive_rhs) = integer_to_naive_datetime(lhs_ts_ns, rhs_ts_ns)?; @@ -742,7 +803,7 @@ fn integer_w_timezone_to_naive_datetime( } // This function parses as the format of "+HH:MM", for example, "+05:30" #[inline] -fn parse_tz_to_offset(tz: &String) -> Result { +fn parse_tz_to_offset(tz: &str) -> Result { let err_str = &String::from("error while parsing timezone"); let err = || DataFusionError::NotImplemented(err_str.to_string()); @@ -800,6 +861,21 @@ fn datetime_month_sub_with_rem( Ok((months, months_residual)) } #[inline] +// This function assumes 'date_time2' is greater than 'date_time1', +// therefore; the result cannot be negative. +fn datetime_day_sub( + date_time2: NaiveDateTime, + date_time1: NaiveDateTime, +) -> Result { + // We directly take the difference of datetimes in nanosecond precision. + date_time2 + .signed_duration_since(date_time1) + .num_nanoseconds() + .ok_or(DataFusionError::NotImplemented(String::from( + "datetime subtraction overflow", + ))) +} +#[inline] fn normalize_duration( months: &i32, months_residual: &Duration, @@ -830,7 +906,7 @@ fn days_in_month(year: i32, month: u32) -> Result { let last_day = match first_day.with_month(month + 1) { Some(day) => day, None => NaiveDate::from_ymd_opt(year + 1, 1, 1).ok_or_else(|| { - DataFusionError::NotImplemented(format!("out-of-range year",)) + DataFusionError::NotImplemented(format!("out of range year: 1+{year}")) })?, }; if let Some(days) = last_day.pred_opt() { @@ -4729,10 +4805,15 @@ mod tests { #[test] fn timestamp_op_tests() { + let round_up_to_month = true; // positive interval, edge cases let timestamps_next = new_timestamps_next(); let timestamps_prev = new_timestamps_prev(); - let expected_results = new_expected_results(1); + let expected_results = if round_up_to_month { + new_expected_results_months(1) + } else { + new_expected_results_days(1) + }; for (idx, exp) in expected_results.iter().enumerate() { assert_eq!( *exp, @@ -4743,7 +4824,11 @@ mod tests { // negative interval, edge cases let timestamps_next = new_timestamps_prev(); let timestamps_prev = new_timestamps_next(); - let expected_results = new_expected_results(-1); + let expected_results = if round_up_to_month { + new_expected_results_months(-1) + } else { + new_expected_results_days(-1) + }; for (idx, exp) in expected_results.iter().enumerate() { assert_eq!( *exp, @@ -4751,37 +4836,31 @@ mod tests { ); } - // timestamp1 + (or -) interval = timestamp2 - // timestamp2 - timestamp1 (or timestamp1 - timestamp2) = interval ? - let sample_size = 100000; - let timestamps1 = get_random_timestamps1(sample_size); - let intervals = get_random_intervals(sample_size); - // ts(sec) + interval(ns) = ts(sec); however, - // ts(sec) - ts(sec) cannot be = interval(ns). Therefore, - // timestamps are more precise than intervals in tests. - let mut timestamp2: ScalarValue; - for (idx, ts1) in timestamps1.iter().enumerate() { - if idx % 2 == 0 { - timestamp2 = ts1.add(intervals[idx].clone()).unwrap(); - println!( - "{:?}, {:?}, {:?}, {:?}", - idx, timestamp2, ts1, intervals[idx] - ); - assert_eq!(intervals[idx], timestamp2.sub(ts1).unwrap()); - } else { - timestamp2 = ts1.sub(intervals[idx].clone()).unwrap(); - println!( - "{:?}, {:?}, {:?}, {:?}", - idx, timestamp2, ts1, intervals[idx] - ); - assert_eq!(intervals[idx], ts1.sub(timestamp2).unwrap()); - }; + // RANDOM-VALUED TESTS, these are not applicable for day format + if round_up_to_month { + // timestamp1 + (or -) interval = timestamp2 + // timestamp2 - timestamp1 (or timestamp1 - timestamp2) = interval ? + let sample_size = 100000; + let timestamps1 = get_random_timestamps1(sample_size); + let intervals = get_random_intervals(sample_size); + // ts(sec) + interval(ns) = ts(sec); however, + // ts(sec) - ts(sec) cannot be = interval(ns). Therefore, + // timestamps are more precise than intervals in tests. + let mut timestamp2: ScalarValue; + for (idx, ts1) in timestamps1.iter().enumerate() { + if idx % 2 == 0 { + timestamp2 = ts1.add(intervals[idx].clone()).unwrap(); + assert_eq!(intervals[idx], timestamp2.sub(ts1).unwrap()); + } else { + timestamp2 = ts1.sub(intervals[idx].clone()).unwrap(); + assert_eq!(intervals[idx], ts1.sub(timestamp2).unwrap()); + }; + } } } fn new_timestamps_next() -> Vec { vec![ - // ScalarValue::TimestampNanosecond(Some(1308158638939668236), None), ScalarValue::TimestampNanosecond( Some( NaiveDate::from_ymd_opt(2023, 1, 1) @@ -4877,7 +4956,6 @@ mod tests { fn new_timestamps_prev() -> Vec { vec![ - // ScalarValue::TimestampNanosecond(Some(1171521569027710670), None), ScalarValue::TimestampNanosecond( Some( NaiveDate::from_ymd_opt(2023, 1, 1) @@ -4971,9 +5049,8 @@ mod tests { ] } - fn new_expected_results(sign: i32) -> Vec { + fn new_expected_results_months(sign: i32) -> Vec { vec![ - // ScalarValue::IntervalMonthDayNano(Some(4040636288743990090004520869950)), ScalarValue::IntervalYearMonth(Some(IntervalYearMonthType::make_value(0, 0))), ScalarValue::IntervalYearMonth(Some(IntervalYearMonthType::make_value( 0, @@ -5007,6 +5084,41 @@ mod tests { ))), ] } + fn new_expected_results_days(sign: i32) -> Vec { + vec![ + ScalarValue::IntervalDayTime(Some(IntervalDayTimeType::make_value(0, 0))), + ScalarValue::IntervalDayTime(Some(IntervalDayTimeType::make_value( + sign * 59, + 0, + ))), + ScalarValue::IntervalDayTime(Some(IntervalDayTimeType::make_value( + sign * 59, + 0, + ))), + ScalarValue::IntervalDayTime(Some(IntervalDayTimeType::make_value( + sign * 59, + 0, + ))), + ScalarValue::IntervalDayTime(Some(IntervalDayTimeType::make_value( + sign * 59, + sign * 250, + ))), + ScalarValue::IntervalMonthDayNano(Some( + IntervalMonthDayNanoType::make_value(0, sign * 59, sign as i64 * 15_000), + )), + ScalarValue::IntervalMonthDayNano(Some( + IntervalMonthDayNanoType::make_value(0, sign * 29, sign as i64 * 22), + )), + ScalarValue::IntervalDayTime(Some(IntervalDayTimeType::make_value( + sign * 425, + sign * 86370000, + ))), + ScalarValue::IntervalDayTime(Some(IntervalDayTimeType::make_value( + sign * 15735, + 0, + ))), + ] + } fn get_random_timestamps1(sample_size: u64) -> Vec { let vector_size = sample_size; From 708d7179d2d258b420de4f3c5e6c181e67f05d38 Mon Sep 17 00:00:00 2001 From: berkaysynnada Date: Wed, 8 Mar 2023 14:41:00 +0300 Subject: [PATCH 04/23] random tests extended --- datafusion-cli/Cargo.lock | 89 +++++++++++++++++---------------- datafusion/common/src/scalar.rs | 77 ++++++++++++++++++++-------- 2 files changed, 101 insertions(+), 65 deletions(-) diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock index 78847f39290d..02fc00c8c4cf 100644 --- a/datafusion-cli/Cargo.lock +++ b/datafusion-cli/Cargo.lock @@ -293,9 +293,9 @@ dependencies = [ [[package]] name = "async-trait" -version = "0.1.64" +version = "0.1.66" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1cd7fce9ba8c3c042128ce72d8b2ddbf3a05747efb67ea0313c635e10bda47a2" +checksum = "b84f9ebcc6c1f5b8cb160f6990096a5c127f423fcb6e1ccc46c370cbdfb75dfc" dependencies = [ "proc-macro2", "quote", @@ -592,9 +592,9 @@ dependencies = [ [[package]] name = "csv" -version = "1.2.0" +version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af91f40b7355f82b0a891f50e70399475945bb0b0da4f1700ce60761c9d3e359" +checksum = "0b015497079b9a9d69c02ad25de6c0a6edef051ea6360a327d0bd05802ef64ad" dependencies = [ "csv-core", "itoa", @@ -613,9 +613,9 @@ dependencies = [ [[package]] name = "cxx" -version = "1.0.91" +version = "1.0.92" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86d3488e7665a7a483b57e25bdd90d0aeb2bc7608c8d0346acf2ad3f1caf1d62" +checksum = "9a140f260e6f3f79013b8bfc65e7ce630c9ab4388c6a89c71e07226f49487b72" dependencies = [ "cc", "cxxbridge-flags", @@ -625,9 +625,9 @@ dependencies = [ [[package]] name = "cxx-build" -version = "1.0.91" +version = "1.0.92" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48fcaf066a053a41a81dfb14d57d99738b767febb8b735c3016e469fac5da690" +checksum = "da6383f459341ea689374bf0a42979739dc421874f112ff26f829b8040b8e613" dependencies = [ "cc", "codespan-reporting", @@ -640,15 +640,15 @@ dependencies = [ [[package]] name = "cxxbridge-flags" -version = "1.0.91" +version = "1.0.92" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2ef98b8b717a829ca5603af80e1f9e2e48013ab227b68ef37872ef84ee479bf" +checksum = "90201c1a650e95ccff1c8c0bb5a343213bdd317c6e600a93075bca2eff54ec97" [[package]] name = "cxxbridge-macro" -version = "1.0.91" +version = "1.0.92" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "086c685979a698443656e5cf7856c95c642295a38599f12fb1ff76fb28d19892" +checksum = "0b75aed41bb2e6367cae39e6326ef817a851db13c13e4f3263714ca3cfb8de56" dependencies = [ "proc-macro2", "quote", @@ -742,6 +742,7 @@ dependencies = [ "num_cpus", "object_store", "parquet", + "rand", "sqlparser", ] @@ -1342,9 +1343,9 @@ checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02" [[package]] name = "io-lifetimes" -version = "1.0.5" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1abeb7a0dd0f8181267ff8adc397075586500b81b28a73e8a0208b00fc170fb3" +checksum = "cfa919a82ea574332e2de6e74b4c36e74d41982b335080fa59d4ef31be20fdf3" dependencies = [ "libc", "windows-sys 0.45.0", @@ -1367,9 +1368,9 @@ dependencies = [ [[package]] name = "itoa" -version = "1.0.5" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fad582f4b9e86b6caa621cabeb0963332d92eea04729ab12892c2533951e6440" +checksum = "453ad9f582a441959e5f0d088b02ce04cfe8d51a8eaf077f12ac6d3e94164ca6" [[package]] name = "jobserver" @@ -1812,9 +1813,9 @@ dependencies = [ [[package]] name = "paste" -version = "1.0.11" +version = "1.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d01a5bd0424d00070b0098dd17ebca6f961a959dead1dbcbbbc1d1cd8d3deeba" +checksum = "9f746c4065a8fa3fe23974dd82f15431cc8d40779821001404d10d2e79ca7d79" [[package]] name = "percent-encoding" @@ -1824,9 +1825,9 @@ checksum = "478c572c3d73181ff3c2539045f6eb99e5491218eae919370993b890cdbdd98e" [[package]] name = "petgraph" -version = "0.6.2" +version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6d5014253a1331579ce62aa67443b4a658c5e7dd03d4bc6d302b94474888143" +checksum = "4dd7d28ee937e54fe3080c91faa1c3a46c06de6252988a7f4592ba2310ef22a4" dependencies = [ "fixedbitset", "indexmap", @@ -2058,9 +2059,9 @@ dependencies = [ [[package]] name = "rustix" -version = "0.36.8" +version = "0.36.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f43abb88211988493c1abb44a70efa56ff0ce98f233b7b276146f1f3f7ba9644" +checksum = "fd5c6ff11fecd55b40746d1995a02f2eb375bf8c00d192d521ee09f42bef37bc" dependencies = [ "bitflags", "errno", @@ -2093,9 +2094,9 @@ dependencies = [ [[package]] name = "rustversion" -version = "1.0.11" +version = "1.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5583e89e108996506031660fe09baa5011b9dd0341b89029313006d1fb508d70" +checksum = "4f3208ce4d8448b3f3e7d168a73f5e0c43a61e32930de3bceeccedb388b6bf06" [[package]] name = "rustyline" @@ -2122,9 +2123,9 @@ dependencies = [ [[package]] name = "ryu" -version = "1.0.12" +version = "1.0.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b4b9743ed687d4b4bcedf9ff5eaa7398495ae14e61cba0a295704edbc7decde" +checksum = "f91339c0467de62360649f8d3e185ca8de4224ff281f66000de5eb2a77a79041" [[package]] name = "same-file" @@ -2143,9 +2144,9 @@ checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" [[package]] name = "scratch" -version = "1.0.3" +version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ddccb15bcce173023b3fedd9436f882a0739b8dfb45e4f6b6002bee5929f61b2" +checksum = "1792db035ce95be60c3f8853017b3999209281c24e2ba5bc8e59bf97a0c590c1" [[package]] name = "sct" @@ -2165,24 +2166,24 @@ checksum = "58bc9567378fc7690d6b2addae4e60ac2eeea07becb2c64b9f218b53865cba2a" [[package]] name = "seq-macro" -version = "0.3.2" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1685deded9b272198423bdbdb907d8519def2f26cf3699040e54e8c4fbd5c5ce" +checksum = "e6b44e8fc93a14e66336d230954dda83d18b4605ccace8fe09bc7514a71ad0bc" [[package]] name = "serde" -version = "1.0.152" +version = "1.0.153" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bb7d1f0d3021d347a83e556fc4683dea2ea09d87bccdf88ff5c12545d89d5efb" +checksum = "3a382c72b4ba118526e187430bb4963cd6d55051ebf13d9b25574d379cc98d20" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.152" +version = "1.0.153" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af487d118eecd09402d70a5d72551860e788df87b464af30e5ea6a38c75c541e" +checksum = "1ef476a5790f0f6decbc66726b6e5d63680ed518283e64c7df415989d880954f" dependencies = [ "proc-macro2", "quote", @@ -2191,9 +2192,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.93" +version = "1.0.94" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cad406b69c91885b5107daf2c29572f6c8cdb3c66826821e286c533490c0bc76" +checksum = "1c533a59c9d8a93a09c6ab31f0fd5e5f4dd1b8fc9434804029839884765d04ea" dependencies = [ "itoa", "ryu", @@ -2268,9 +2269,9 @@ checksum = "5e9f0ab6ef7eb7353d9119c170a436d1bf248eea575ac42d19d12f4e34130831" [[package]] name = "socket2" -version = "0.4.7" +version = "0.4.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02e2d2db9033d13a1567121ddd7a095ee144db4e1ca1b1bda3419bc0da294ebd" +checksum = "64a4a911eed85daf18834cfaa86a79b7d266ff93ff5ba14005426219480ed662" dependencies = [ "libc", "winapi", @@ -2387,18 +2388,18 @@ checksum = "222a222a5bfe1bba4a77b45ec488a741b3cb8872e5e499451fd7d0129c9c7c3d" [[package]] name = "thiserror" -version = "1.0.38" +version = "1.0.39" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a9cd18aa97d5c45c6603caea1da6628790b37f7a34b6ca89522331c5180fed0" +checksum = "a5ab016db510546d856297882807df8da66a16fb8c4101cb8b30054b0d5b2d9c" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.38" +version = "1.0.39" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fb327af4685e4d03fa8cbcf1716380da910eeb2bb8be417e7f9fd3fb164f36f" +checksum = "5420d42e90af0c38c3290abcca25b9b3bdf379fc9f55c528f53a269d9c9a267e" dependencies = [ "proc-macro2", "quote", @@ -2574,9 +2575,9 @@ checksum = "d54675592c1dbefd78cbd98db9bacd89886e1ca50692a0692baefffdeb92dd58" [[package]] name = "unicode-ident" -version = "1.0.6" +version = "1.0.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "84a22b9f218b40614adcb3f4ff08b703773ad44fa9423e4e0d346d5db86e4ebc" +checksum = "e5464a87b239f13a63a501f2701565754bae92d243d4bb7eb12f6d57d2269bf4" [[package]] name = "unicode-normalization" diff --git a/datafusion/common/src/scalar.rs b/datafusion/common/src/scalar.rs index 2e31b24811d4..0e69f02f00f0 100644 --- a/datafusion/common/src/scalar.rs +++ b/datafusion/common/src/scalar.rs @@ -4836,26 +4836,29 @@ mod tests { ); } - // RANDOM-VALUED TESTS, these are not applicable for day format - if round_up_to_month { - // timestamp1 + (or -) interval = timestamp2 - // timestamp2 - timestamp1 (or timestamp1 - timestamp2) = interval ? - let sample_size = 100000; - let timestamps1 = get_random_timestamps1(sample_size); - let intervals = get_random_intervals(sample_size); - // ts(sec) + interval(ns) = ts(sec); however, - // ts(sec) - ts(sec) cannot be = interval(ns). Therefore, - // timestamps are more precise than intervals in tests. - let mut timestamp2: ScalarValue; - for (idx, ts1) in timestamps1.iter().enumerate() { - if idx % 2 == 0 { - timestamp2 = ts1.add(intervals[idx].clone()).unwrap(); - assert_eq!(intervals[idx], timestamp2.sub(ts1).unwrap()); - } else { - timestamp2 = ts1.sub(intervals[idx].clone()).unwrap(); - assert_eq!(intervals[idx], ts1.sub(timestamp2).unwrap()); - }; - } + // RANDOM-VALUED TESTS + + // timestamp1 + (or -) interval = timestamp2 + // timestamp2 - timestamp1 (or timestamp1 - timestamp2) = interval ? + let sample_size = 100000; + let timestamps1 = get_random_timestamps1(sample_size); + let intervals = if round_up_to_month { + get_random_intervals_months(sample_size) + } else { + get_random_intervals_days(sample_size) + }; + // ts(sec) + interval(ns) = ts(sec); however, + // ts(sec) - ts(sec) cannot be = interval(ns). Therefore, + // timestamps are more precise than intervals in tests. + let mut timestamp2: ScalarValue; + for (idx, ts1) in timestamps1.iter().enumerate() { + if idx % 2 == 0 { + timestamp2 = ts1.add(intervals[idx].clone()).unwrap(); + assert_eq!(intervals[idx], timestamp2.sub(ts1).unwrap()); + } else { + timestamp2 = ts1.sub(intervals[idx].clone()).unwrap(); + assert_eq!(intervals[idx], ts1.sub(timestamp2).unwrap()); + }; } } @@ -5183,7 +5186,7 @@ mod tests { timestamp } - fn get_random_intervals(sample_size: u64) -> Vec { + fn get_random_intervals_months(sample_size: u64) -> Vec { let vector_size = sample_size; let mut intervals = vec![]; let mut rng = rand::thread_rng(); @@ -5215,4 +5218,36 @@ mod tests { } intervals } + fn get_random_intervals_days(sample_size: u64) -> Vec { + let vector_size = sample_size; + let mut intervals = vec![]; + let mut rng = rand::thread_rng(); + for i in 0..vector_size { + if i % 4 == 0 { + let days = rng.gen_range(0..=1000); + intervals.push(ScalarValue::IntervalDayTime(Some( + IntervalDayTimeType::make_value(days, 0), + ))) + } else if i % 4 == 1 { + let days = rng.gen_range(0..=1000); + let millis = rng.gen_range(0..=86_400_000); + intervals.push(ScalarValue::IntervalDayTime(Some( + IntervalDayTimeType::make_value(days, millis), + ))) + } else if i % 4 == 2 { + let days = rng.gen_range(0..=1000); + let millis = rng.gen_range(0..=86_400_000); + intervals.push(ScalarValue::IntervalDayTime(Some( + IntervalDayTimeType::make_value(days, millis), + ))) + } else { + let days = rng.gen_range(0..=1000); + let nanosecs = rng.gen_range(1..86_400_000_000_000); + intervals.push(ScalarValue::IntervalMonthDayNano(Some( + IntervalMonthDayNanoType::make_value(0, days, nanosecs), + ))); + } + } + intervals + } } From c5bacbe2612eb5c3d91cceba30fde39b2b2d05d4 Mon Sep 17 00:00:00 2001 From: berkaysynnada Date: Wed, 8 Mar 2023 14:57:38 +0300 Subject: [PATCH 05/23] corrections after review --- datafusion/common/src/scalar.rs | 135 ++++++++++++++------------------ 1 file changed, 59 insertions(+), 76 deletions(-) diff --git a/datafusion/common/src/scalar.rs b/datafusion/common/src/scalar.rs index 0e69f02f00f0..c4a3e73d8728 100644 --- a/datafusion/common/src/scalar.rs +++ b/datafusion/common/src/scalar.rs @@ -580,17 +580,11 @@ fn ts_microsec_sub_to_interval( lhs_tz: &Option, rhs_tz: &Option, ) -> Result { - match (lhs_ts.checked_mul(1_000), rhs_ts.checked_mul(1_000)) { - (Some(lhs_ns), Some(rhs_ns)) => { - ts_nanosec_sub_to_interval(&lhs_ns, &rhs_ns, lhs_tz, rhs_tz) - } - (None, _) => Err(DataFusionError::NotImplemented(format!( - "overflow while conversion of {lhs_ts:?}" - ))), - (_, None) => Err(DataFusionError::NotImplemented(format!( - "overflow while conversion of {rhs_ts:?}" - ))), - } + let err_msg = "Overflow while conversion to microseconds"; + let err = || DataFusionError::Execution(err_msg.to_string()); + let lhs_ns = lhs_ts.checked_mul(1_000).ok_or_else(err)?; + let rhs_ns = rhs_ts.checked_mul(1_000).ok_or_else(err)?; + ts_nanosec_sub_to_interval(&lhs_ns, &rhs_ns, lhs_tz, rhs_tz) } #[inline] fn ts_millisec_sub_to_interval( @@ -599,17 +593,11 @@ fn ts_millisec_sub_to_interval( lhs_tz: &Option, rhs_tz: &Option, ) -> Result { - match (lhs_ts.checked_mul(1_000_000), rhs_ts.checked_mul(1_000_000)) { - (Some(lhs_ns), Some(rhs_ns)) => { - ts_nanosec_sub_to_interval(&lhs_ns, &rhs_ns, lhs_tz, rhs_tz) - } - (None, _) => Err(DataFusionError::NotImplemented(format!( - "overflow while conversion of {lhs_ts:?}" - ))), - (_, None) => Err(DataFusionError::NotImplemented(format!( - "overflow while conversion of {rhs_ts:?}" - ))), - } + let err_msg = "Overflow while conversion to microseconds"; + let err = || DataFusionError::Execution(err_msg.to_string()); + let lhs_ns = lhs_ts.checked_mul(1_000_000).ok_or_else(err)?; + let rhs_ns = rhs_ts.checked_mul(1_000_000).ok_or_else(err)?; + ts_nanosec_sub_to_interval(&lhs_ns, &rhs_ns, lhs_tz, rhs_tz) } #[inline] fn ts_sec_sub_to_interval( @@ -618,20 +606,11 @@ fn ts_sec_sub_to_interval( lhs_tz: &Option, rhs_tz: &Option, ) -> Result { - match ( - lhs_ts.checked_mul(1_000_000_000), - rhs_ts.checked_mul(1_000_000_000), - ) { - (Some(lhs_ns), Some(rhs_ns)) => { - ts_nanosec_sub_to_interval(&lhs_ns, &rhs_ns, lhs_tz, rhs_tz) - } - (None, _) => Err(DataFusionError::NotImplemented(format!( - "overflow while conversion of {lhs_ts:?}" - ))), - (_, None) => Err(DataFusionError::NotImplemented(format!( - "overflow while conversion of {rhs_ts:?}" - ))), - } + let err_msg = "Overflow while conversion to microseconds"; + let err = || DataFusionError::Execution(err_msg.to_string()); + let lhs_ns = lhs_ts.checked_mul(1_000_000_000).ok_or_else(err)?; + let rhs_ns = rhs_ts.checked_mul(1_000_000_000).ok_or_else(err)?; + ts_nanosec_sub_to_interval(&lhs_ns, &rhs_ns, lhs_tz, rhs_tz) } // This function will be removed once the result format is clarified. @@ -641,7 +620,7 @@ fn ts_nanosec_sub_to_interval( lhs_tz: &Option, rhs_tz: &Option, ) -> Result { - let round_up_to_month = true; + let round_up_to_month = false; if round_up_to_month { ts_nanosec_sub_to_interval_months(lhs_ts, rhs_ts, lhs_tz, rhs_tz) @@ -676,9 +655,8 @@ fn ts_nanosec_sub_to_interval_months( let (mut months, mut months_residual) = datetime_month_sub_with_rem(naive_date_time2, naive_date_time1)?; - let err = || { - DataFusionError::NotImplemented(String::from("months_residual nanosec overflow")) - }; + let err = + || DataFusionError::Execution(String::from("months_residual nanosec overflow")); // Check whether we can return an IntervalYearMonth variant without losing information let value = months_residual.num_nanoseconds().ok_or_else(err)?; if value == 0 { @@ -779,7 +757,7 @@ fn integer_to_naive_datetime( ), ) { (Some(x), Some(y)) => Ok((x, y)), - (x, y) => Err(DataFusionError::NotImplemented(format!( + (x, y) => Err(DataFusionError::Execution(format!( "timestamps {x:?} or {y:?} cannot be converted to datetimes", ))), } @@ -805,20 +783,20 @@ fn integer_w_timezone_to_naive_datetime( #[inline] fn parse_tz_to_offset(tz: &str) -> Result { let err_str = &String::from("error while parsing timezone"); - let err = || DataFusionError::NotImplemented(err_str.to_string()); + let err = || DataFusionError::Execution(err_str.to_string()); let sign = tz.chars().next().ok_or_else(err)?; let hours = tz[1..3] .parse::() - .map_err(|_e| DataFusionError::NotImplemented(err_str.to_string()))?; + .map_err(|_e| DataFusionError::Execution(err_str.to_string()))?; let minutes = tz[4..6] .parse::() - .map_err(|_e| DataFusionError::NotImplemented(err_str.to_string()))?; + .map_err(|_e| DataFusionError::Execution(err_str.to_string()))?; let timezone_offset = match sign { '-' => FixedOffset::east_opt(hours * 3600 + minutes * 60).ok_or_else(err)?, '+' => FixedOffset::west_opt(hours * 3600 + minutes * 60).ok_or_else(err)?, _ => { - return Err(DataFusionError::NotImplemented(err_str.to_string())); + return Err(DataFusionError::Execution(err_str.to_string())); } }; Ok(timezone_offset) @@ -871,7 +849,7 @@ fn datetime_day_sub( date_time2 .signed_duration_since(date_time1) .num_nanoseconds() - .ok_or(DataFusionError::NotImplemented(String::from( + .ok_or(DataFusionError::Execution(String::from( "datetime subtraction overflow", ))) } @@ -891,7 +869,7 @@ fn normalize_duration( let months_residual_new = match months_residual.checked_add(added_days) { Some(value) => value, None => { - return Err(DataFusionError::NotImplemented(format!( + return Err(DataFusionError::Execution(format!( "normalize duration error, cannot add {added_days:?} days to {months_residual:?}", ))) } @@ -906,14 +884,14 @@ fn days_in_month(year: i32, month: u32) -> Result { let last_day = match first_day.with_month(month + 1) { Some(day) => day, None => NaiveDate::from_ymd_opt(year + 1, 1, 1).ok_or_else(|| { - DataFusionError::NotImplemented(format!("out of range year: 1+{year}")) + DataFusionError::Execution(format!("out of range year: 1+{year}")) })?, }; if let Some(days) = last_day.pred_opt() { return Ok(days.day()); } } - Err(DataFusionError::NotImplemented(format!( + Err(DataFusionError::Execution(format!( "invalid date parameters, year: {year:?} & month: {month:?}", ))) } @@ -4805,39 +4783,44 @@ mod tests { #[test] fn timestamp_op_tests() { - let round_up_to_month = true; + let round_up_to_month = false; // positive interval, edge cases - let timestamps_next = new_timestamps_next(); - let timestamps_prev = new_timestamps_prev(); + let vec_timestamps_next = timestamps_next(); + let vec_timestamps_prev = timestamps_prev(); let expected_results = if round_up_to_month { - new_expected_results_months(1) + expected_results_months(1) } else { - new_expected_results_days(1) + expected_results_days(1) }; for (idx, exp) in expected_results.iter().enumerate() { assert_eq!( *exp, - timestamps_next[idx].sub(×tamps_prev[idx]).unwrap() + vec_timestamps_next[idx] + .sub(&vec_timestamps_prev[idx]) + .unwrap() ) } // negative interval, edge cases - let timestamps_next = new_timestamps_prev(); - let timestamps_prev = new_timestamps_next(); + let vec_timestamps_next = timestamps_prev(); + let vec_timestamps_prev = timestamps_next(); let expected_results = if round_up_to_month { - new_expected_results_months(-1) + expected_results_months(-1) } else { - new_expected_results_days(-1) + expected_results_days(-1) }; for (idx, exp) in expected_results.iter().enumerate() { assert_eq!( *exp, - timestamps_next[idx].sub(×tamps_prev[idx]).unwrap() + vec_timestamps_next[idx] + .sub(&vec_timestamps_prev[idx]) + .unwrap() ); } - - // RANDOM-VALUED TESTS - + } + #[test] + fn timestamp_op_random_tests() { + let round_up_to_month = false; // timestamp1 + (or -) interval = timestamp2 // timestamp2 - timestamp1 (or timestamp1 - timestamp2) = interval ? let sample_size = 100000; @@ -4862,7 +4845,7 @@ mod tests { } } - fn new_timestamps_next() -> Vec { + fn timestamps_next() -> Vec { vec![ ScalarValue::TimestampNanosecond( Some( @@ -4957,7 +4940,7 @@ mod tests { ] } - fn new_timestamps_prev() -> Vec { + fn timestamps_prev() -> Vec { vec![ ScalarValue::TimestampNanosecond( Some( @@ -5052,7 +5035,7 @@ mod tests { ] } - fn new_expected_results_months(sign: i32) -> Vec { + fn expected_results_months(sign: i32) -> Vec { vec![ ScalarValue::IntervalYearMonth(Some(IntervalYearMonthType::make_value(0, 0))), ScalarValue::IntervalYearMonth(Some(IntervalYearMonthType::make_value( @@ -5087,7 +5070,7 @@ mod tests { ))), ] } - fn new_expected_results_days(sign: i32) -> Vec { + fn expected_results_days(sign: i32) -> Vec { vec![ ScalarValue::IntervalDayTime(Some(IntervalDayTimeType::make_value(0, 0))), ScalarValue::IntervalDayTime(Some(IntervalDayTimeType::make_value( @@ -5192,8 +5175,8 @@ mod tests { let mut rng = rand::thread_rng(); for i in 0..vector_size { if i % 3 == 2 && i % 4 == 3 { - let month = rng.gen_range(0..=100); - // there is an test issue for the days 28(29). + let month = rng.gen_range(0..100); + // there is a complex test issue for the days 28(29). // for example, if we have an expected interval 2 months 28(29) days, // the subtractor finds it as 3 months if the previous timestamp // is at february. @@ -5203,14 +5186,14 @@ mod tests { IntervalMonthDayNanoType::make_value(month, day, nanosec), ))); } else if i % 3 == 1 && i % 4 != 0 { - let day = rng.gen_range(0..=5000); + let day = rng.gen_range(0..5000); let millisec = rng.gen_range(0..86_400_000); intervals.push(ScalarValue::IntervalDayTime(Some( IntervalDayTimeType::make_value(day, millisec), ))) } else { - let year = rng.gen_range(0..=20); - let month = rng.gen_range(0..=50); + let year = rng.gen_range(0..20); + let month = rng.gen_range(0..50); intervals.push(ScalarValue::IntervalYearMonth(Some( IntervalYearMonthType::make_value(year, month), ))) @@ -5224,24 +5207,24 @@ mod tests { let mut rng = rand::thread_rng(); for i in 0..vector_size { if i % 4 == 0 { - let days = rng.gen_range(0..=1000); + let days = rng.gen_range(0..1000); intervals.push(ScalarValue::IntervalDayTime(Some( IntervalDayTimeType::make_value(days, 0), ))) } else if i % 4 == 1 { - let days = rng.gen_range(0..=1000); + let days = rng.gen_range(0..1000); let millis = rng.gen_range(0..=86_400_000); intervals.push(ScalarValue::IntervalDayTime(Some( IntervalDayTimeType::make_value(days, millis), ))) } else if i % 4 == 2 { - let days = rng.gen_range(0..=1000); + let days = rng.gen_range(0..1000); let millis = rng.gen_range(0..=86_400_000); intervals.push(ScalarValue::IntervalDayTime(Some( IntervalDayTimeType::make_value(days, millis), ))) } else { - let days = rng.gen_range(0..=1000); + let days = rng.gen_range(0..1000); let nanosecs = rng.gen_range(1..86_400_000_000_000); intervals.push(ScalarValue::IntervalMonthDayNano(Some( IntervalMonthDayNanoType::make_value(0, days, nanosecs), From 011933f141905a26d2100ec12bb86f8d2ada96f0 Mon Sep 17 00:00:00 2001 From: berkaysynnada Date: Wed, 8 Mar 2023 19:16:29 +0300 Subject: [PATCH 06/23] operator check --- datafusion/common/src/scalar.rs | 53 ++++++++++++++++++++++++++------- 1 file changed, 42 insertions(+), 11 deletions(-) diff --git a/datafusion/common/src/scalar.rs b/datafusion/common/src/scalar.rs index c4a3e73d8728..f136785dad6b 100644 --- a/datafusion/common/src/scalar.rs +++ b/datafusion/common/src/scalar.rs @@ -508,26 +508,57 @@ macro_rules! impl_op { ( ScalarValue::TimestampNanosecond(Some(ts_lhs), tz_lhs), ScalarValue::TimestampNanosecond(Some(ts_rhs), tz_rhs), - ) => Ok(ts_nanosec_sub_to_interval( - &ts_lhs, &ts_rhs, &tz_lhs, &tz_rhs, - )?), + ) => match get_sign!($OPERATION) { + -1 => Ok(ts_nanosec_sub_to_interval( + &ts_lhs, &ts_rhs, &tz_lhs, &tz_rhs, + )?), + _ => Err(DataFusionError::Internal(format!( + "Operator {} is not implemented for types {:?} and {:?}", + stringify!($OPERATION), + $LHS, + $RHS + ))), + }, ( ScalarValue::TimestampMicrosecond(Some(ts_lhs), tz_lhs), ScalarValue::TimestampMicrosecond(Some(ts_rhs), tz_rhs), - ) => Ok(ts_microsec_sub_to_interval( - &ts_lhs, &ts_rhs, &tz_lhs, &tz_rhs, - )?), + ) => match get_sign!($OPERATION) { + -1 => Ok(ts_microsec_sub_to_interval( + &ts_lhs, &ts_rhs, &tz_lhs, &tz_rhs, + )?), + _ => Err(DataFusionError::Internal(format!( + "Operator {} is not implemented for types {:?} and {:?}", + stringify!($OPERATION), + $LHS, + $RHS + ))), + }, ( ScalarValue::TimestampMillisecond(Some(ts_lhs), tz_lhs), ScalarValue::TimestampMillisecond(Some(ts_rhs), tz_rhs), - ) => Ok(ts_millisec_sub_to_interval( - &ts_lhs, &ts_rhs, &tz_lhs, &tz_rhs, - )?), + ) => match get_sign!($OPERATION) { + -1 => Ok(ts_millisec_sub_to_interval( + &ts_lhs, &ts_rhs, &tz_lhs, &tz_rhs, + )?), + _ => Err(DataFusionError::Internal(format!( + "Operator {} is not implemented for types {:?} and {:?}", + stringify!($OPERATION), + $LHS, + $RHS + ))), + }, ( ScalarValue::TimestampSecond(Some(ts_lhs), tz_lhs), ScalarValue::TimestampSecond(Some(ts_rhs), tz_rhs), - ) => Ok(ts_sec_sub_to_interval(&ts_lhs, &ts_rhs, &tz_lhs, &tz_rhs)?), - + ) => match get_sign!($OPERATION) { + -1 => Ok(ts_sec_sub_to_interval(&ts_lhs, &ts_rhs, &tz_lhs, &tz_rhs)?), + _ => Err(DataFusionError::Internal(format!( + "Operator {} is not implemented for types {:?} and {:?}", + stringify!($OPERATION), + $LHS, + $RHS + ))), + }, // Binary operations on arguments with different types: (ScalarValue::Date32(Some(days)), _) => { let value = date32_add(*days, $RHS, get_sign!($OPERATION))?; From e475f587dc9875643e6a398c00878714bbd105d2 Mon Sep 17 00:00:00 2001 From: berkaysynnada Date: Thu, 9 Mar 2023 13:10:41 +0300 Subject: [PATCH 07/23] flag is removed --- datafusion/common/src/scalar.rs | 369 +++++++++----------------------- 1 file changed, 106 insertions(+), 263 deletions(-) diff --git a/datafusion/common/src/scalar.rs b/datafusion/common/src/scalar.rs index f136785dad6b..e6f66e15e756 100644 --- a/datafusion/common/src/scalar.rs +++ b/datafusion/common/src/scalar.rs @@ -611,7 +611,7 @@ fn ts_microsec_sub_to_interval( lhs_tz: &Option, rhs_tz: &Option, ) -> Result { - let err_msg = "Overflow while conversion to microseconds"; + let err_msg = "Overflow while conversion from microsecond to nanoseconds"; let err = || DataFusionError::Execution(err_msg.to_string()); let lhs_ns = lhs_ts.checked_mul(1_000).ok_or_else(err)?; let rhs_ns = rhs_ts.checked_mul(1_000).ok_or_else(err)?; @@ -624,7 +624,7 @@ fn ts_millisec_sub_to_interval( lhs_tz: &Option, rhs_tz: &Option, ) -> Result { - let err_msg = "Overflow while conversion to microseconds"; + let err_msg = "Overflow while conversion from millisecond to nanoseconds"; let err = || DataFusionError::Execution(err_msg.to_string()); let lhs_ns = lhs_ts.checked_mul(1_000_000).ok_or_else(err)?; let rhs_ns = rhs_ts.checked_mul(1_000_000).ok_or_else(err)?; @@ -637,38 +637,28 @@ fn ts_sec_sub_to_interval( lhs_tz: &Option, rhs_tz: &Option, ) -> Result { - let err_msg = "Overflow while conversion to microseconds"; + let err_msg = "Overflow while conversion from second to nanoseconds"; let err = || DataFusionError::Execution(err_msg.to_string()); let lhs_ns = lhs_ts.checked_mul(1_000_000_000).ok_or_else(err)?; let rhs_ns = rhs_ts.checked_mul(1_000_000_000).ok_or_else(err)?; ts_nanosec_sub_to_interval(&lhs_ns, &rhs_ns, lhs_tz, rhs_tz) } -// This function will be removed once the result format is clarified. -fn ts_nanosec_sub_to_interval( - lhs_ts: &i64, - rhs_ts: &i64, - lhs_tz: &Option, - rhs_tz: &Option, -) -> Result { - let round_up_to_month = false; - - if round_up_to_month { - ts_nanosec_sub_to_interval_months(lhs_ts, rhs_ts, lhs_tz, rhs_tz) - } else { - ts_nanosec_sub_to_interval_days(lhs_ts, rhs_ts, lhs_tz, rhs_tz) - } -} - // Nanosecond-scale timestamps are subtracted to result in the narrowest interval variant. // Interval variants are always consist of the same signed parts to handle comparison operations more wisely. // For example, lhs < rhs => Interval(-, -, -), lhs > rhs => Interval(+, +, +) -fn ts_nanosec_sub_to_interval_months( +// In month-day-nano format, month bits are always 0, the result is shown in days as the largest scale. +fn ts_nanosec_sub_to_interval( lhs_ts: &i64, rhs_ts: &i64, lhs_tz: &Option, rhs_tz: &Option, ) -> Result { + let err = || { + DataFusionError::Execution(String::from( + "nanosec overflow in timestamp subtractÅŸon", + )) + }; // Conversion of integer and string-typed timestamps to NaiveDateTime objects // Timezone offsets are added also if applicable. let (naive_date_time2_unchecked, naive_date_time1_unchecked) = @@ -678,100 +668,45 @@ fn ts_nanosec_sub_to_interval_months( integer_to_naive_datetime(lhs_ts, rhs_ts)? }; - // Check whether we will find a negative interval or not + // Check whether we will find a negative interval or not. let (naive_date_time2, naive_date_time1, sign) = find_interval_sign(naive_date_time2_unchecked, naive_date_time1_unchecked); // Subtraction of datetimes. Details are inside the function. - let (mut months, mut months_residual) = + let (months, months_residual) = datetime_month_sub_with_rem(naive_date_time2, naive_date_time1)?; - - let err = - || DataFusionError::Execution(String::from("months_residual nanosec overflow")); - // Check whether we can return an IntervalYearMonth variant without losing information - let value = months_residual.num_nanoseconds().ok_or_else(err)?; - if value == 0 { + // Check whether we can return an IntervalYearMonth variant without losing information. + let months_residual_in_ns = months_residual.num_nanoseconds().ok_or_else(err)?; + if months_residual_in_ns == 0 { return Ok(ScalarValue::IntervalYearMonth(Some(sign * months))); } - // If months_residual is negative, take one month from months and - // add it to months_residual to make it positive. - // To ensure the difference is positive all the time, we take the days - // of previous datetime's month. - if months_residual.num_nanoseconds() < Some(0) { - (months, months_residual) = - normalize_duration(&months, &months_residual, naive_date_time1)?; - } - // Check whether we can return an IntervalDayTime variant without losing information - let months_residual_in_ns = months_residual.num_nanoseconds().ok_or_else(err)?; + let delta_secs = naive_date_time2.signed_duration_since(naive_date_time1); if months_residual_in_ns % 1_000_000 == 0 { - let delta_secs = naive_date_time2 - .signed_duration_since(naive_date_time1) - .num_milliseconds(); // 60 * 60 * 24 * 1000 = 86_400_000, number of millisecs in a day + let as_millisec = delta_secs.num_milliseconds(); return Ok(ScalarValue::IntervalDayTime(Some( IntervalDayTimeType::make_value( - sign * (delta_secs / 86_400_000) as i32, - sign * (delta_secs % 86_400_000) as i32, + sign * (as_millisec / 86_400_000) as i32, + sign * (as_millisec % 86_400_000) as i32, ), ))); - } - - // 60 * 60 * 24 * 1000 * 1000 * 1000 = 86_400_000_000_000, number of nanosecs in a day - Ok(ScalarValue::IntervalMonthDayNano(Some( - IntervalMonthDayNanoType::make_value( - sign * months, - sign * (months_residual_in_ns / 86_400_000_000_000) as i32, - sign as i64 * (months_residual_in_ns % 86_400_000_000_000), - ), - ))) -} - -// Nanosecond-scale timestamps are subtracted to result in the narrowest interval variant. -// Interval variants are always consist of the same signed parts to handle comparison operations more wisely. -// For example, lhs < rhs => Interval(-, -, -), lhs > rhs => Interval(+, +, +) -fn ts_nanosec_sub_to_interval_days( - lhs_ts: &i64, - rhs_ts: &i64, - lhs_tz: &Option, - rhs_tz: &Option, -) -> Result { - // Conversion of integer and string-typed timestamps to NaiveDateTime objects - // Timezone offsets are added also if applicable. - let (naive_date_time2_unchecked, naive_date_time1_unchecked) = - if let (Some(l), Some(r)) = (lhs_tz, rhs_tz) { - integer_w_timezone_to_naive_datetime(lhs_ts, rhs_ts, l, r)? - } else { - integer_to_naive_datetime(lhs_ts, rhs_ts)? - }; - - // Check whether we will find a negative interval or not - let (naive_date_time2, naive_date_time1, sign) = - find_interval_sign(naive_date_time2_unchecked, naive_date_time1_unchecked); - - // Subtraction of datetimes. Details are inside the function. - let duration_in_nanosec = datetime_day_sub(naive_date_time2, naive_date_time1)?; - - // Try to return in IntervalDayTime - if duration_in_nanosec % 1_000_000 == 0 { - return Ok(ScalarValue::IntervalDayTime(Some( - IntervalDayTimeType::make_value( - sign * (duration_in_nanosec / 86_400_000_000_000) as i32, - sign * ((duration_in_nanosec / 1_000_000) % 86_400_000) as i32, + } else { + // 60 * 60 * 24 * 1000 * 1000 * 1000 = 86_400_000_000_000, number of nanosecs in a day + // To show similar behaviour with Postgre, we do not use month field, and collect + // months in the day field. + let as_nanosec = delta_secs.num_nanoseconds().ok_or_else(err)?; + Ok(ScalarValue::IntervalMonthDayNano(Some( + IntervalMonthDayNanoType::make_value( + 0, + sign * (as_nanosec / 86_400_000_000_000) as i32, + sign as i64 * (as_nanosec % 86_400_000_000_000), ), - ))); + ))) } - - // The last option IntervalMonthDayNano - Ok(ScalarValue::IntervalMonthDayNano(Some( - IntervalMonthDayNanoType::make_value( - 0, - sign * (duration_in_nanosec / 86_400_000_000_000) as i32, - sign as i64 * (duration_in_nanosec % 86_400_000_000_000), - ), - ))) } + #[inline] fn integer_to_naive_datetime( lhs_ts_ns: &i64, @@ -850,8 +785,7 @@ fn datetime_month_sub_with_rem( date_time2: NaiveDateTime, date_time1: NaiveDateTime, ) -> Result<(i32, Duration), DataFusionError> { - // The difference of total months. Since this operation ignores the days of dates, - // that month count may be decreased by 1 in case of negative day count. + // The difference of total months. let months = (date_time2.year() - date_time1.year()) * 12 + (date_time2.month() as i32 - date_time1.month() as i32); @@ -869,63 +803,6 @@ fn datetime_month_sub_with_rem( Ok((months, months_residual)) } -#[inline] -// This function assumes 'date_time2' is greater than 'date_time1', -// therefore; the result cannot be negative. -fn datetime_day_sub( - date_time2: NaiveDateTime, - date_time1: NaiveDateTime, -) -> Result { - // We directly take the difference of datetimes in nanosecond precision. - date_time2 - .signed_duration_since(date_time1) - .num_nanoseconds() - .ok_or(DataFusionError::Execution(String::from( - "datetime subtraction overflow", - ))) -} -#[inline] -fn normalize_duration( - months: &i32, - months_residual: &Duration, - at_month: NaiveDateTime, -) -> Result<(i32, Duration), DataFusionError> { - // For example, if the previous datetime's month and date is (Feb, 15), - // when we add the days of that month to month_residual - // variable, we need to add the february's day count. - // To ensure the difference is positive all the time, we take the days - // of previous datetime's month. - let added_days = - &Duration::days(days_in_month(at_month.year(), at_month.month())?.into()); - let months_residual_new = match months_residual.checked_add(added_days) { - Some(value) => value, - None => { - return Err(DataFusionError::Execution(format!( - "normalize duration error, cannot add {added_days:?} days to {months_residual:?}", - ))) - } - }; - let months_new = months - 1; - Ok((months_new, months_residual_new)) -} -#[inline] -// It gives the day count of the corresponding month at that year. -fn days_in_month(year: i32, month: u32) -> Result { - if let Some(first_day) = NaiveDate::from_ymd_opt(year, month, 1) { - let last_day = match first_day.with_month(month + 1) { - Some(day) => day, - None => NaiveDate::from_ymd_opt(year + 1, 1, 1).ok_or_else(|| { - DataFusionError::Execution(format!("out of range year: 1+{year}")) - })?, - }; - if let Some(days) = last_day.pred_opt() { - return Ok(days.day()); - } - } - Err(DataFusionError::Execution(format!( - "invalid date parameters, year: {year:?} & month: {month:?}", - ))) -} #[inline] pub fn date32_add(days: i32, scalar: &ScalarValue, sign: i32) -> Result { @@ -4814,15 +4691,11 @@ mod tests { #[test] fn timestamp_op_tests() { - let round_up_to_month = false; // positive interval, edge cases let vec_timestamps_next = timestamps_next(); let vec_timestamps_prev = timestamps_prev(); - let expected_results = if round_up_to_month { - expected_results_months(1) - } else { - expected_results_days(1) - }; + let expected_results = get_expected_results(1); + for (idx, exp) in expected_results.iter().enumerate() { assert_eq!( *exp, @@ -4835,11 +4708,7 @@ mod tests { // negative interval, edge cases let vec_timestamps_next = timestamps_prev(); let vec_timestamps_prev = timestamps_next(); - let expected_results = if round_up_to_month { - expected_results_months(-1) - } else { - expected_results_days(-1) - }; + let expected_results = get_expected_results(-1); for (idx, exp) in expected_results.iter().enumerate() { assert_eq!( *exp, @@ -4851,16 +4720,11 @@ mod tests { } #[test] fn timestamp_op_random_tests() { - let round_up_to_month = false; // timestamp1 + (or -) interval = timestamp2 // timestamp2 - timestamp1 (or timestamp1 - timestamp2) = interval ? - let sample_size = 100000; - let timestamps1 = get_random_timestamps1(sample_size); - let intervals = if round_up_to_month { - get_random_intervals_months(sample_size) - } else { - get_random_intervals_days(sample_size) - }; + let sample_size = 10000000; + let timestamps1 = get_random_timestamps(sample_size); + let intervals = get_random_intervals(sample_size); // ts(sec) + interval(ns) = ts(sec); however, // ts(sec) - ts(sec) cannot be = interval(ns). Therefore, // timestamps are more precise than intervals in tests. @@ -4868,10 +4732,22 @@ mod tests { for (idx, ts1) in timestamps1.iter().enumerate() { if idx % 2 == 0 { timestamp2 = ts1.add(intervals[idx].clone()).unwrap(); - assert_eq!(intervals[idx], timestamp2.sub(ts1).unwrap()); + assert_eq!( + intervals[idx], + timestamp2.sub(ts1).unwrap(), + "operands: {:?} (-) {:?}", + ts1.add(intervals[idx].clone()).unwrap(), + ts1 + ); } else { timestamp2 = ts1.sub(intervals[idx].clone()).unwrap(); - assert_eq!(intervals[idx], ts1.sub(timestamp2).unwrap()); + assert_eq!( + intervals[idx], + ts1.sub(timestamp2).unwrap(), + "operands: {:?} (-) {:?}", + ts1, + ts1.sub(intervals[idx].clone()).unwrap() + ); }; } } @@ -5066,7 +4942,7 @@ mod tests { ] } - fn expected_results_months(sign: i32) -> Vec { + fn get_expected_results(sign: i32) -> Vec { vec![ ScalarValue::IntervalYearMonth(Some(IntervalYearMonthType::make_value(0, 0))), ScalarValue::IntervalYearMonth(Some(IntervalYearMonthType::make_value( @@ -5086,10 +4962,10 @@ mod tests { sign * 250, ))), ScalarValue::IntervalMonthDayNano(Some( - IntervalMonthDayNanoType::make_value(sign * 2, 0, sign as i64 * 15_000), + IntervalMonthDayNanoType::make_value(0, sign * 59, sign as i64 * 15_000), )), ScalarValue::IntervalMonthDayNano(Some( - IntervalMonthDayNanoType::make_value(sign, sign, sign as i64 * 22), + IntervalMonthDayNanoType::make_value(0, sign * 29, sign as i64 * 22), )), ScalarValue::IntervalDayTime(Some(IntervalDayTimeType::make_value( sign * 425, @@ -5101,43 +4977,8 @@ mod tests { ))), ] } - fn expected_results_days(sign: i32) -> Vec { - vec![ - ScalarValue::IntervalDayTime(Some(IntervalDayTimeType::make_value(0, 0))), - ScalarValue::IntervalDayTime(Some(IntervalDayTimeType::make_value( - sign * 59, - 0, - ))), - ScalarValue::IntervalDayTime(Some(IntervalDayTimeType::make_value( - sign * 59, - 0, - ))), - ScalarValue::IntervalDayTime(Some(IntervalDayTimeType::make_value( - sign * 59, - 0, - ))), - ScalarValue::IntervalDayTime(Some(IntervalDayTimeType::make_value( - sign * 59, - sign * 250, - ))), - ScalarValue::IntervalMonthDayNano(Some( - IntervalMonthDayNanoType::make_value(0, sign * 59, sign as i64 * 15_000), - )), - ScalarValue::IntervalMonthDayNano(Some( - IntervalMonthDayNanoType::make_value(0, sign * 29, sign as i64 * 22), - )), - ScalarValue::IntervalDayTime(Some(IntervalDayTimeType::make_value( - sign * 425, - sign * 86370000, - ))), - ScalarValue::IntervalDayTime(Some(IntervalDayTimeType::make_value( - sign * 15735, - 0, - ))), - ] - } - fn get_random_timestamps1(sample_size: u64) -> Vec { + fn get_random_timestamps(sample_size: u64) -> Vec { let vector_size = sample_size; let mut timestamp = vec![]; let mut rng = rand::thread_rng(); @@ -5160,7 +5001,11 @@ mod tests { None, )) } else if i % 4 == 1 { - let millisec = rng.gen_range(0..=999); + let rand = rng.gen_range(1..=999); + let millisec = if rand % 2 == 1 { rand } else { rand - 1 }; + // timestamps millisecs are always created with odd millisecs to prevent. + // such situations: timestamp(millisec) - interval(millisec) = timestamp(millisec) + // However, timestamp(millisec) - timestamp(millisec) = interval(month) timestamp.push(ScalarValue::TimestampMillisecond( Some( NaiveDate::from_ymd_opt(year, month, day) @@ -5172,7 +5017,7 @@ mod tests { None, )) } else if i % 4 == 2 { - let microsec = rng.gen_range(0..=999_999); + let microsec = rng.gen_range(1..=999_999); timestamp.push(ScalarValue::TimestampMicrosecond( Some( NaiveDate::from_ymd_opt(year, month, day) @@ -5184,7 +5029,8 @@ mod tests { None, )) } else if i % 4 == 3 { - let nanosec = rng.gen_range(0..=999_999_999); + let rand = rng.gen_range(1..=999_999_999); + let nanosec = if rand % 2 == 1 { rand } else { rand - 1 }; timestamp.push(ScalarValue::TimestampNanosecond( Some( NaiveDate::from_ymd_opt(year, month, day) @@ -5200,65 +5046,62 @@ mod tests { timestamp } - fn get_random_intervals_months(sample_size: u64) -> Vec { - let vector_size = sample_size; - let mut intervals = vec![]; - let mut rng = rand::thread_rng(); - for i in 0..vector_size { - if i % 3 == 2 && i % 4 == 3 { - let month = rng.gen_range(0..100); - // there is a complex test issue for the days 28(29). - // for example, if we have an expected interval 2 months 28(29) days, - // the subtractor finds it as 3 months if the previous timestamp - // is at february. - let day = rng.gen_range(0..=27); - let nanosec = rng.gen_range(0..86_400_000_000_000); - intervals.push(ScalarValue::IntervalMonthDayNano(Some( - IntervalMonthDayNanoType::make_value(month, day, nanosec), - ))); - } else if i % 3 == 1 && i % 4 != 0 { - let day = rng.gen_range(0..5000); - let millisec = rng.gen_range(0..86_400_000); - intervals.push(ScalarValue::IntervalDayTime(Some( - IntervalDayTimeType::make_value(day, millisec), - ))) - } else { - let year = rng.gen_range(0..20); - let month = rng.gen_range(0..50); - intervals.push(ScalarValue::IntervalYearMonth(Some( - IntervalYearMonthType::make_value(year, month), - ))) - } - } - intervals - } - fn get_random_intervals_days(sample_size: u64) -> Vec { + fn get_random_intervals(sample_size: u64) -> Vec { let vector_size = sample_size; let mut intervals = vec![]; let mut rng = rand::thread_rng(); for i in 0..vector_size { if i % 4 == 0 { - let days = rng.gen_range(0..1000); - intervals.push(ScalarValue::IntervalDayTime(Some( - IntervalDayTimeType::make_value(days, 0), - ))) + let days = rng.gen_range(1..1000); + // To have variatons like timestamp(sec) + IntervalYearMonth and + // timestamp(sec) + IntervalDayTimeType(without millisec, since timestamps(sec) + + // interval(millisec) => timestamp(sec), we cannot forecast the resulting type). + // such conditions are added. + if i % 8 == 0 + || (days % 28 != 0) + || (days % 29 != 0) + || (days % 30 != 0) + || (days % 31 != 0) + { + intervals.push(ScalarValue::IntervalYearMonth(Some( + IntervalYearMonthType::make_value( + rng.gen_range(0..10), + rng.gen_range(0..500), + ), + ))) + } else { + intervals.push(ScalarValue::IntervalDayTime(Some( + IntervalDayTimeType::make_value(days, 0), + ))) + } } else if i % 4 == 1 { - let days = rng.gen_range(0..1000); - let millis = rng.gen_range(0..=86_400_000); + // interval millisecs are always created with even millisecs. + let days = rng.gen_range(1..1000); + let rand = rng.gen_range(0..86_400_000); + let millisec = if rand % 2 == 0 { rand } else { rand - 1 }; intervals.push(ScalarValue::IntervalDayTime(Some( - IntervalDayTimeType::make_value(days, millis), + IntervalDayTimeType::make_value(days, millisec), ))) } else if i % 4 == 2 { - let days = rng.gen_range(0..1000); - let millis = rng.gen_range(0..=86_400_000); + let days = rng.gen_range(1..1000); + let millisec = rng.gen_range(0..86_400_000); intervals.push(ScalarValue::IntervalDayTime(Some( - IntervalDayTimeType::make_value(days, millis), + IntervalDayTimeType::make_value(days, millisec), ))) } else { let days = rng.gen_range(0..1000); - let nanosecs = rng.gen_range(1..86_400_000_000_000); + let rand = rng.gen_range(1..86_400_000_000_000); + let nanosec = if rand % 2 == 0 { rand } else { rand - 1 }; intervals.push(ScalarValue::IntervalMonthDayNano(Some( - IntervalMonthDayNanoType::make_value(0, days, nanosecs), + IntervalMonthDayNanoType::make_value( + 0, + days, + if nanosec % 1_000_000 == 0 { + nanosec - 1 + } else { + nanosec + }, + ), ))); } } From 423fb65714f4d3c407fa7870743a201578de8340 Mon Sep 17 00:00:00 2001 From: berkaysynnada Date: Thu, 9 Mar 2023 14:38:47 +0300 Subject: [PATCH 08/23] clippy fix --- datafusion/common/src/scalar.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datafusion/common/src/scalar.rs b/datafusion/common/src/scalar.rs index e6f66e15e756..3a078cee3184 100644 --- a/datafusion/common/src/scalar.rs +++ b/datafusion/common/src/scalar.rs @@ -686,12 +686,12 @@ fn ts_nanosec_sub_to_interval( if months_residual_in_ns % 1_000_000 == 0 { // 60 * 60 * 24 * 1000 = 86_400_000, number of millisecs in a day let as_millisec = delta_secs.num_milliseconds(); - return Ok(ScalarValue::IntervalDayTime(Some( + Ok(ScalarValue::IntervalDayTime(Some( IntervalDayTimeType::make_value( sign * (as_millisec / 86_400_000) as i32, sign * (as_millisec % 86_400_000) as i32, ), - ))); + ))) } else { // 60 * 60 * 24 * 1000 * 1000 * 1000 = 86_400_000_000_000, number of nanosecs in a day // To show similar behaviour with Postgre, we do not use month field, and collect From 1291758c0ac6db07d0fca4bfc0536229c7f4f5d4 Mon Sep 17 00:00:00 2001 From: berkaysynnada Date: Thu, 9 Mar 2023 14:41:00 +0300 Subject: [PATCH 09/23] toml conflict --- datafusion/common/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/common/Cargo.toml b/datafusion/common/Cargo.toml index fa0d0c71a60c..6a546f3fd70e 100644 --- a/datafusion/common/Cargo.toml +++ b/datafusion/common/Cargo.toml @@ -47,5 +47,5 @@ num_cpus = "1.13.0" object_store = { version = "0.5.4", default-features = false, optional = true } parquet = { version = "34.0.0", default-features = false, optional = true } pyo3 = { version = "0.18.0", optional = true } +sqlparser = "0.32" rand = "0.8.4" -sqlparser = "0.30" From d7f3696ee436ce154a76cecd7c28d4fa06e04f2d Mon Sep 17 00:00:00 2001 From: berkaysynnada Date: Thu, 9 Mar 2023 16:43:01 +0300 Subject: [PATCH 10/23] minor changes --- datafusion/common/Cargo.toml | 2 +- datafusion/common/src/scalar.rs | 13 ++++++------- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/datafusion/common/Cargo.toml b/datafusion/common/Cargo.toml index 554de6fa2efa..aa38ebd0e46c 100644 --- a/datafusion/common/Cargo.toml +++ b/datafusion/common/Cargo.toml @@ -47,6 +47,6 @@ num_cpus = "1.13.0" object_store = { version = "0.5.4", default-features = false, optional = true } parquet = { version = "34.0.0", default-features = false, optional = true } pyo3 = { version = "0.18.0", optional = true } -sqlparser = "0.32" rand = "0.8.4" +sqlparser = "0.32" diff --git a/datafusion/common/src/scalar.rs b/datafusion/common/src/scalar.rs index e848b79f66a2..acbaa1ec2b2d 100644 --- a/datafusion/common/src/scalar.rs +++ b/datafusion/common/src/scalar.rs @@ -672,7 +672,7 @@ fn ts_nanosec_sub_to_interval( ) -> Result { let err = || { DataFusionError::Execution(String::from( - "nanosec overflow in timestamp subtractÅŸon", + "nanosec overflow in timestamp subtraction", )) }; // Conversion of integer and string-typed timestamps to NaiveDateTime objects @@ -4784,25 +4784,24 @@ mod tests { // ts(sec) + interval(ns) = ts(sec); however, // ts(sec) - ts(sec) cannot be = interval(ns). Therefore, // timestamps are more precise than intervals in tests. - let mut timestamp2: ScalarValue; for (idx, ts1) in timestamps1.iter().enumerate() { if idx % 2 == 0 { - timestamp2 = ts1.add(intervals[idx].clone()).unwrap(); + let timestamp2 = ts1.add(intervals[idx].clone()).unwrap(); assert_eq!( intervals[idx], timestamp2.sub(ts1).unwrap(), "operands: {:?} (-) {:?}", - ts1.add(intervals[idx].clone()).unwrap(), + timestamp2, ts1 ); } else { - timestamp2 = ts1.sub(intervals[idx].clone()).unwrap(); + let timestamp2 = ts1.sub(intervals[idx].clone()).unwrap(); assert_eq!( intervals[idx], - ts1.sub(timestamp2).unwrap(), + ts1.sub(timestamp2.clone()).unwrap(), "operands: {:?} (-) {:?}", ts1, - ts1.sub(intervals[idx].clone()).unwrap() + timestamp2 ); }; } From 8d5c8e3eed1f89cb9e0718592e04285531803c3d Mon Sep 17 00:00:00 2001 From: berkaysynnada Date: Sat, 11 Mar 2023 19:47:51 +0300 Subject: [PATCH 11/23] deterministic matches --- datafusion/common/src/scalar.rs | 393 ++++++++++++++------------------ 1 file changed, 171 insertions(+), 222 deletions(-) diff --git a/datafusion/common/src/scalar.rs b/datafusion/common/src/scalar.rs index acbaa1ec2b2d..10107306a614 100644 --- a/datafusion/common/src/scalar.rs +++ b/datafusion/common/src/scalar.rs @@ -43,9 +43,7 @@ use arrow::{ DECIMAL128_MAX_PRECISION, }, }; -use chrono::{ - DateTime, Datelike, Duration, FixedOffset, NaiveDate, NaiveDateTime, Timelike, -}; +use chrono::{DateTime, Datelike, Duration, FixedOffset, NaiveDate, NaiveDateTime}; /// Represents a dynamically typed, nullable single value. /// This is the single-valued counter-part to arrow's [`Array`]. @@ -506,54 +504,78 @@ macro_rules! impl_op { primitive_op!(lhs, rhs, Int8, $OPERATION) } ( - ScalarValue::TimestampNanosecond(Some(ts_lhs), tz_lhs), - ScalarValue::TimestampNanosecond(Some(ts_rhs), tz_rhs), + ScalarValue::TimestampSecond(Some(ts_lhs), tz_lhs), + ScalarValue::TimestampSecond(Some(ts_rhs), tz_rhs), ) => match get_sign!($OPERATION) { - -1 => Ok(ts_nanosec_sub_to_interval( - &ts_lhs, &ts_rhs, &tz_lhs, &tz_rhs, - )?), + -1 => { + let err = || { + DataFusionError::Execution( + "Overflow while conversion from second to millisecond" + .to_string(), + ) + }; + Ok(ts_sub_to_interval( + &ts_lhs.checked_mul(1_000).ok_or_else(err)?, + &ts_rhs.checked_mul(1_000).ok_or_else(err)?, + &tz_lhs, + &tz_rhs, + 1, + )?) + } _ => Err(DataFusionError::Internal(format!( - "Operator {} is not implemented for types {:?} and {:?}", + "Operator {} is not implemented for {:?} and {:?}", stringify!($OPERATION), $LHS, $RHS ))), }, ( - ScalarValue::TimestampMicrosecond(Some(ts_lhs), tz_lhs), - ScalarValue::TimestampMicrosecond(Some(ts_rhs), tz_rhs), + ScalarValue::TimestampMillisecond(Some(ts_lhs), tz_lhs), + ScalarValue::TimestampMillisecond(Some(ts_rhs), tz_rhs), ) => match get_sign!($OPERATION) { - -1 => Ok(ts_microsec_sub_to_interval( - &ts_lhs, &ts_rhs, &tz_lhs, &tz_rhs, - )?), + -1 => Ok(ts_sub_to_interval(&ts_lhs, &ts_rhs, &tz_lhs, &tz_rhs, 1)?), _ => Err(DataFusionError::Internal(format!( - "Operator {} is not implemented for types {:?} and {:?}", + "Operator {} is not implemented for {:?} and {:?}", stringify!($OPERATION), $LHS, $RHS ))), }, ( - ScalarValue::TimestampMillisecond(Some(ts_lhs), tz_lhs), - ScalarValue::TimestampMillisecond(Some(ts_rhs), tz_rhs), + ScalarValue::TimestampMicrosecond(Some(ts_lhs), tz_lhs), + ScalarValue::TimestampMicrosecond(Some(ts_rhs), tz_rhs), ) => match get_sign!($OPERATION) { - -1 => Ok(ts_millisec_sub_to_interval( - &ts_lhs, &ts_rhs, &tz_lhs, &tz_rhs, - )?), + -1 => { + let err = || { + DataFusionError::Execution( + "Overflow while conversion from microsecond to nanosecond" + .to_string(), + ) + }; + Ok(ts_sub_to_interval( + &ts_lhs.checked_mul(1_000).ok_or_else(err)?, + &ts_rhs.checked_mul(1_000).ok_or_else(err)?, + &tz_lhs, + &tz_rhs, + 1_000_000, + )?) + } _ => Err(DataFusionError::Internal(format!( - "Operator {} is not implemented for types {:?} and {:?}", + "Operator {} is not implemented for {:?} and {:?}", stringify!($OPERATION), $LHS, $RHS ))), }, ( - ScalarValue::TimestampSecond(Some(ts_lhs), tz_lhs), - ScalarValue::TimestampSecond(Some(ts_rhs), tz_rhs), + ScalarValue::TimestampNanosecond(Some(ts_lhs), tz_lhs), + ScalarValue::TimestampNanosecond(Some(ts_rhs), tz_rhs), ) => match get_sign!($OPERATION) { - -1 => Ok(ts_sec_sub_to_interval(&ts_lhs, &ts_rhs, &tz_lhs, &tz_rhs)?), + -1 => Ok(ts_sub_to_interval( + &ts_lhs, &ts_rhs, &tz_lhs, &tz_rhs, 1_000_000, + )?), _ => Err(DataFusionError::Internal(format!( - "Operator {} is not implemented for types {:?} and {:?}", + "Operator {} is not implemented for {:?} and {:?}", stringify!($OPERATION), $LHS, $RHS @@ -619,146 +641,126 @@ macro_rules! get_sign { }; } -// all timestamp variants are converted to nanosecond scale -#[inline] -fn ts_microsec_sub_to_interval( - lhs_ts: &i64, - rhs_ts: &i64, - lhs_tz: &Option, - rhs_tz: &Option, -) -> Result { - let err_msg = "Overflow while conversion from microsecond to nanoseconds"; - let err = || DataFusionError::Execution(err_msg.to_string()); - let lhs_ns = lhs_ts.checked_mul(1_000).ok_or_else(err)?; - let rhs_ns = rhs_ts.checked_mul(1_000).ok_or_else(err)?; - ts_nanosec_sub_to_interval(&lhs_ns, &rhs_ns, lhs_tz, rhs_tz) -} -#[inline] -fn ts_millisec_sub_to_interval( +// Timestamp(sec) and Timestamp(millisec) difference is resulting as Interval(days, millis) +// Timestamp(microsec) and Tiemstamp(nanosec) difference is resulting as Interval(days, nanos) +fn ts_sub_to_interval( lhs_ts: &i64, rhs_ts: &i64, lhs_tz: &Option, rhs_tz: &Option, + scale_factor: i32, ) -> Result { - let err_msg = "Overflow while conversion from millisecond to nanoseconds"; - let err = || DataFusionError::Execution(err_msg.to_string()); - let lhs_ns = lhs_ts.checked_mul(1_000_000).ok_or_else(err)?; - let rhs_ns = rhs_ts.checked_mul(1_000_000).ok_or_else(err)?; - ts_nanosec_sub_to_interval(&lhs_ns, &rhs_ns, lhs_tz, rhs_tz) -} -#[inline] -fn ts_sec_sub_to_interval( - lhs_ts: &i64, - rhs_ts: &i64, - lhs_tz: &Option, - rhs_tz: &Option, -) -> Result { - let err_msg = "Overflow while conversion from second to nanoseconds"; - let err = || DataFusionError::Execution(err_msg.to_string()); - let lhs_ns = lhs_ts.checked_mul(1_000_000_000).ok_or_else(err)?; - let rhs_ns = rhs_ts.checked_mul(1_000_000_000).ok_or_else(err)?; - ts_nanosec_sub_to_interval(&lhs_ns, &rhs_ns, lhs_tz, rhs_tz) -} - -// Nanosecond-scale timestamps are subtracted to result in the narrowest interval variant. -// Interval variants are always consist of the same signed parts to handle comparison operations more wisely. -// For example, lhs < rhs => Interval(-, -, -), lhs > rhs => Interval(+, +, +) -// In month-day-nano format, month bits are always 0, the result is shown in days as the largest scale. -fn ts_nanosec_sub_to_interval( - lhs_ts: &i64, - rhs_ts: &i64, - lhs_tz: &Option, - rhs_tz: &Option, -) -> Result { - let err = || { - DataFusionError::Execution(String::from( - "nanosec overflow in timestamp subtraction", - )) - }; // Conversion of integer and string-typed timestamps to NaiveDateTime objects // Timezone offsets are added also if applicable. let (naive_date_time2_unchecked, naive_date_time1_unchecked) = - if let (Some(l), Some(r)) = (lhs_tz, rhs_tz) { - integer_w_timezone_to_naive_datetime(lhs_ts, rhs_ts, l, r)? - } else { - integer_to_naive_datetime(lhs_ts, rhs_ts)? - }; + with_timezone_to_naive_datetime(lhs_ts, rhs_ts, lhs_tz, rhs_tz, &scale_factor)?; // Check whether we will find a negative interval or not. let (naive_date_time2, naive_date_time1, sign) = find_interval_sign(naive_date_time2_unchecked, naive_date_time1_unchecked); - // Subtraction of datetimes. Details are inside the function. - let (months, months_residual) = - datetime_month_sub_with_rem(naive_date_time2, naive_date_time1)?; - // Check whether we can return an IntervalYearMonth variant without losing information. - let months_residual_in_ns = months_residual.num_nanoseconds().ok_or_else(err)?; - if months_residual_in_ns == 0 { - return Ok(ScalarValue::IntervalYearMonth(Some(sign * months))); - } - - // Check whether we can return an IntervalDayTime variant without losing information let delta_secs = naive_date_time2.signed_duration_since(naive_date_time1); - if months_residual_in_ns % 1_000_000 == 0 { + + match scale_factor { // 60 * 60 * 24 * 1000 = 86_400_000, number of millisecs in a day - let as_millisec = delta_secs.num_milliseconds(); - Ok(ScalarValue::IntervalDayTime(Some( - IntervalDayTimeType::make_value( - sign * (as_millisec / 86_400_000) as i32, - sign * (as_millisec % 86_400_000) as i32, - ), - ))) - } else { - // 60 * 60 * 24 * 1000 * 1000 * 1000 = 86_400_000_000_000, number of nanosecs in a day - // To show similar behaviour with Postgre, we do not use month field, and collect - // months in the day field. - let as_nanosec = delta_secs.num_nanoseconds().ok_or_else(err)?; - Ok(ScalarValue::IntervalMonthDayNano(Some( - IntervalMonthDayNanoType::make_value( - 0, - sign * (as_nanosec / 86_400_000_000_000) as i32, - sign as i64 * (as_nanosec % 86_400_000_000_000), - ), - ))) + 1 => { + let as_millisecs = delta_secs.num_milliseconds(); + Ok(ScalarValue::IntervalDayTime(Some( + IntervalDayTimeType::make_value( + sign * (as_millisecs / 86_400_000) as i32, + sign * (as_millisecs % 86_400_000) as i32, + ), + ))) + } + // 60 * 60 * 24 * 1000_000_000 = 86_400_000_000_000, number of nanosecs in a day + 1_000_000 => { + let as_nanosecs = delta_secs.num_nanoseconds().ok_or_else(|| { + DataFusionError::Execution(String::from( + "timestamp difference cannot be shown in nanosecond precision", + )) + })?; + Ok(ScalarValue::IntervalMonthDayNano(Some( + IntervalMonthDayNanoType::make_value( + 0, + sign * (as_nanosecs / 86_400_000_000_000) as i32, + sign as i64 * (as_nanosecs % 86_400_000_000_000), + ), + ))) + } + _ => Err(DataFusionError::Execution(String::from( + "undefined scale factor", + ))), } } +#[inline] +fn with_timezone_to_naive_datetime( + lhs_ts: &i64, + rhs_ts: &i64, + lhs_tz: &Option, + rhs_tz: &Option, + scale_factor: &i32, +) -> Result<(NaiveDateTime, NaiveDateTime), DataFusionError> { + let (naive_lhs, naive_rhs) = match scale_factor { + 1 => ms_to_naive_datetime(lhs_ts, rhs_ts)?, + 1_000_000 => ns_to_naive_datetime(lhs_ts, rhs_ts)?, + _ => { + return Err(DataFusionError::Execution(String::from( + "undefined scale factor", + ))) + } + }; + match (lhs_tz, rhs_tz) { + (Some(l), Some(r)) => match (parse_tz_to_offset(l), parse_tz_to_offset(r)) { + (Ok(l), Ok(r)) => Ok(( + DateTime::::from_utc(naive_lhs, l).naive_local(), + DateTime::::from_utc(naive_rhs, r).naive_local(), + )), + (_, _) => Ok((naive_lhs, naive_rhs)), + }, + (_, _) => Ok((naive_lhs, naive_rhs)), + } +} #[inline] -fn integer_to_naive_datetime( - lhs_ts_ns: &i64, - rhs_ts_ns: &i64, +fn ms_to_naive_datetime( + lhs_ts_ms: &i64, + rhs_ts_ms: &i64, ) -> Result<(NaiveDateTime, NaiveDateTime), DataFusionError> { match ( NaiveDateTime::from_timestamp_opt( - lhs_ts_ns / 1_000_000_000, - (lhs_ts_ns % 1_000_000_000) as u32, + lhs_ts_ms / 1_000, + (lhs_ts_ms % 1_000) as u32 * 1_000_000, ), NaiveDateTime::from_timestamp_opt( - rhs_ts_ns / 1_000_000_000, - (rhs_ts_ns % 1_000_000_000) as u32, + rhs_ts_ms / 1_000, + (rhs_ts_ms % 1_000) as u32 * 1_000_000, ), ) { (Some(x), Some(y)) => Ok((x, y)), (x, y) => Err(DataFusionError::Execution(format!( - "timestamps {x:?} or {y:?} cannot be converted to datetimes", + "timestamps {x:?} or {y:?} cannot be converted to NaiveDateTime", ))), } } #[inline] -fn integer_w_timezone_to_naive_datetime( +fn ns_to_naive_datetime( lhs_ts_ns: &i64, rhs_ts_ns: &i64, - lhs_tz: &str, - rhs_tz: &str, ) -> Result<(NaiveDateTime, NaiveDateTime), DataFusionError> { - let (naive_lhs, naive_rhs) = integer_to_naive_datetime(lhs_ts_ns, rhs_ts_ns)?; - - match (parse_tz_to_offset(lhs_tz), parse_tz_to_offset(rhs_tz)) { - (Ok(l), Ok(r)) => Ok(( - DateTime::::from_utc(naive_lhs, l).naive_local(), - DateTime::::from_utc(naive_rhs, r).naive_local(), - )), - (_, _) => Ok((naive_lhs, naive_rhs)), + match ( + NaiveDateTime::from_timestamp_opt( + lhs_ts_ns / 1_000_000_000, + (lhs_ts_ns % 1_000_000_000) as u32, + ), + NaiveDateTime::from_timestamp_opt( + rhs_ts_ns / 1_000_000_000, + (rhs_ts_ns % 1_000_000_000) as u32, + ), + ) { + (Some(x), Some(y)) => Ok((x, y)), + (x, y) => Err(DataFusionError::Execution(format!( + "timestamps {x:?} or {y:?} cannot be converted to NaiveDateTime", + ))), } } // This function parses as the format of "+HH:MM", for example, "+05:30" @@ -794,31 +796,6 @@ fn find_interval_sign( (ndt2, ndt1, 1) } } -// This function assumes 'date_time2' is greater than 'date_time1', -// therefore; resulting 'months' cannot be negative. -#[inline] -fn datetime_month_sub_with_rem( - date_time2: NaiveDateTime, - date_time1: NaiveDateTime, -) -> Result<(i32, Duration), DataFusionError> { - // The difference of total months. - let months = (date_time2.year() - date_time1.year()) * 12 - + (date_time2.month() as i32 - date_time1.month() as i32); - - // months_residual is in the form of X secs, Y nanosecs. - // Y cannot be larger than 1_000_000_000, it is rounded up to seconds. - // The subtractions may overflow, so cast i64. - let months_residual = - Duration::days(date_time2.day() as i64 - date_time1.day() as i64) - + Duration::hours(date_time2.hour() as i64 - date_time1.hour() as i64) - + Duration::minutes(date_time2.minute() as i64 - date_time1.minute() as i64) - + Duration::seconds(date_time2.second() as i64 - date_time1.second() as i64) - + Duration::nanoseconds( - date_time2.nanosecond() as i64 - date_time1.nanosecond() as i64, - ); - - Ok((months, months_residual)) -} #[inline] pub fn date32_add(days: i32, scalar: &ScalarValue, sign: i32) -> Result { @@ -4778,7 +4755,7 @@ mod tests { fn timestamp_op_random_tests() { // timestamp1 + (or -) interval = timestamp2 // timestamp2 - timestamp1 (or timestamp1 - timestamp2) = interval ? - let sample_size = 10000000; + let sample_size = 1000000; let timestamps1 = get_random_timestamps(sample_size); let intervals = get_random_intervals(sample_size); // ts(sec) + interval(ns) = ts(sec); however, @@ -4790,7 +4767,8 @@ mod tests { assert_eq!( intervals[idx], timestamp2.sub(ts1).unwrap(), - "operands: {:?} (-) {:?}", + "index:{}, operands: {:?} (-) {:?}", + idx, timestamp2, ts1 ); @@ -4799,7 +4777,8 @@ mod tests { assert_eq!( intervals[idx], ts1.sub(timestamp2.clone()).unwrap(), - "operands: {:?} (-) {:?}", + "index:{}, operands: {:?} (-) {:?}", + idx, ts1, timestamp2 ); @@ -4831,7 +4810,7 @@ mod tests { ), ScalarValue::TimestampMillisecond( Some( - NaiveDate::from_ymd_opt(2023, 3, 1) + NaiveDate::from_ymd_opt(2023, 2, 11) .unwrap() .and_hms_milli_opt(10, 10, 0, 000) .unwrap() @@ -4999,18 +4978,19 @@ mod tests { fn get_expected_results(sign: i32) -> Vec { vec![ - ScalarValue::IntervalYearMonth(Some(IntervalYearMonthType::make_value(0, 0))), - ScalarValue::IntervalYearMonth(Some(IntervalYearMonthType::make_value( - 0, - sign * 2, - ))), - ScalarValue::IntervalYearMonth(Some(IntervalYearMonthType::make_value( + ScalarValue::IntervalMonthDayNano(Some( + IntervalMonthDayNanoType::make_value(0, 0, 0), + )), + ScalarValue::IntervalMonthDayNano(Some( + IntervalMonthDayNanoType::make_value(0, sign * 59, 0), + )), + ScalarValue::IntervalDayTime(Some(IntervalDayTimeType::make_value( + sign * 41, 0, - sign * 2, ))), - ScalarValue::IntervalYearMonth(Some(IntervalYearMonthType::make_value( + ScalarValue::IntervalDayTime(Some(IntervalDayTimeType::make_value( + sign * 59, 0, - sign * 2, ))), ScalarValue::IntervalDayTime(Some(IntervalDayTimeType::make_value( sign * 59, @@ -5026,9 +5006,9 @@ mod tests { sign * 425, sign * 86370000, ))), - ScalarValue::IntervalYearMonth(Some(IntervalYearMonthType::make_value( - sign * 43, - sign, + ScalarValue::IntervalDayTime(Some(IntervalDayTimeType::make_value( + sign * 15735, + 0, ))), ] } @@ -5040,7 +5020,7 @@ mod tests { for i in 0..vector_size { let year = rng.gen_range(1995..=2050); let month = rng.gen_range(1..=12); - let day = rng.gen_range(1..=28); + let day = rng.gen_range(1..=28); // to exclude invalid dates let hour = rng.gen_range(0..=23); let minute = rng.gen_range(0..=59); let second = rng.gen_range(0..=59); @@ -5056,11 +5036,7 @@ mod tests { None, )) } else if i % 4 == 1 { - let rand = rng.gen_range(1..=999); - let millisec = if rand % 2 == 1 { rand } else { rand - 1 }; - // timestamps millisecs are always created with odd millisecs to prevent. - // such situations: timestamp(millisec) - interval(millisec) = timestamp(millisec) - // However, timestamp(millisec) - timestamp(millisec) = interval(month) + let millisec = rng.gen_range(0..=999); timestamp.push(ScalarValue::TimestampMillisecond( Some( NaiveDate::from_ymd_opt(year, month, day) @@ -5072,7 +5048,7 @@ mod tests { None, )) } else if i % 4 == 2 { - let microsec = rng.gen_range(1..=999_999); + let microsec = rng.gen_range(0..=999_999); timestamp.push(ScalarValue::TimestampMicrosecond( Some( NaiveDate::from_ymd_opt(year, month, day) @@ -5084,8 +5060,7 @@ mod tests { None, )) } else if i % 4 == 3 { - let rand = rng.gen_range(1..=999_999_999); - let nanosec = if rand % 2 == 1 { rand } else { rand - 1 }; + let nanosec = rng.gen_range(0..=999_999_999); timestamp.push(ScalarValue::TimestampNanosecond( Some( NaiveDate::from_ymd_opt(year, month, day) @@ -5107,56 +5082,30 @@ mod tests { let mut rng = rand::thread_rng(); for i in 0..vector_size { if i % 4 == 0 { - let days = rng.gen_range(1..1000); - // To have variatons like timestamp(sec) + IntervalYearMonth and - // timestamp(sec) + IntervalDayTimeType(without millisec, since timestamps(sec) + - // interval(millisec) => timestamp(sec), we cannot forecast the resulting type). - // such conditions are added. - if i % 8 == 0 - || (days % 28 != 0) - || (days % 29 != 0) - || (days % 30 != 0) - || (days % 31 != 0) - { - intervals.push(ScalarValue::IntervalYearMonth(Some( - IntervalYearMonthType::make_value( - rng.gen_range(0..10), - rng.gen_range(0..500), - ), - ))) - } else { - intervals.push(ScalarValue::IntervalDayTime(Some( - IntervalDayTimeType::make_value(days, 0), - ))) - } - } else if i % 4 == 1 { - // interval millisecs are always created with even millisecs. - let days = rng.gen_range(1..1000); - let rand = rng.gen_range(0..86_400_000); - let millisec = if rand % 2 == 0 { rand } else { rand - 1 }; + let days = rng.gen_range(0..5000); + // to not break second precision + let millis = rng.gen_range(0..86_400) * 1000; intervals.push(ScalarValue::IntervalDayTime(Some( - IntervalDayTimeType::make_value(days, millisec), + IntervalDayTimeType::make_value(days, millis), ))) - } else if i % 4 == 2 { - let days = rng.gen_range(1..1000); + } else if i % 4 == 1 { + let days = rng.gen_range(0..5000); let millisec = rng.gen_range(0..86_400_000); intervals.push(ScalarValue::IntervalDayTime(Some( IntervalDayTimeType::make_value(days, millisec), ))) + } else if i % 4 == 2 { + let days = rng.gen_range(0..5000); + // to not break microsec precision + let nanosec = rng.gen_range(0..86_400_000_000) * 1000; + intervals.push(ScalarValue::IntervalMonthDayNano(Some( + IntervalMonthDayNanoType::make_value(0, days, nanosec), + ))) } else { - let days = rng.gen_range(0..1000); - let rand = rng.gen_range(1..86_400_000_000_000); - let nanosec = if rand % 2 == 0 { rand } else { rand - 1 }; + let days = rng.gen_range(0..5000); + let nanosec = rng.gen_range(0..86_400_000_000_000); intervals.push(ScalarValue::IntervalMonthDayNano(Some( - IntervalMonthDayNanoType::make_value( - 0, - days, - if nanosec % 1_000_000 == 0 { - nanosec - 1 - } else { - nanosec - }, - ), + IntervalMonthDayNanoType::make_value(0, days, nanosec), ))); } } From 31577d95a59601cd4c4c19df1aa158fa32d5667e Mon Sep 17 00:00:00 2001 From: berkaysynnada Date: Sun, 12 Mar 2023 17:57:27 +0300 Subject: [PATCH 12/23] simplifications (clippy error) --- datafusion-cli/Cargo.lock | 68 ++++++++++++++++----------------- datafusion/common/src/scalar.rs | 34 +++++------------ 2 files changed, 44 insertions(+), 58 deletions(-) diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock index 68ff002f7ad6..a5e40209fe1d 100644 --- a/datafusion-cli/Cargo.lock +++ b/datafusion-cli/Cargo.lock @@ -356,9 +356,9 @@ dependencies = [ [[package]] name = "block-buffer" -version = "0.10.3" +version = "0.10.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69cce20737498f97b993470a6e536b8523f0af7892a4f928cceb1ac5e52ebe7e" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" dependencies = [ "generic-array", ] @@ -546,9 +546,9 @@ dependencies = [ [[package]] name = "constant_time_eq" -version = "0.2.4" +version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3ad85c1f65dc7b37604eb0e89748faf0b9653065f2a8ef69f96a687ec1e9279" +checksum = "13418e745008f7349ec7e449155f419a61b92b58a99cc3616942b926825ec76b" [[package]] name = "core-foundation-sys" @@ -1023,9 +1023,9 @@ dependencies = [ [[package]] name = "futures" -version = "0.3.26" +version = "0.3.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13e2792b0ff0340399d58445b88fd9770e3489eff258a4cbc1523418f12abf84" +checksum = "531ac96c6ff5fd7c62263c5e3c67a603af4fcaee2e1a0ae5565ba3a11e69e549" dependencies = [ "futures-channel", "futures-core", @@ -1038,9 +1038,9 @@ dependencies = [ [[package]] name = "futures-channel" -version = "0.3.26" +version = "0.3.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e5317663a9089767a1ec00a487df42e0ca174b61b4483213ac24448e4664df5" +checksum = "164713a5a0dcc3e7b4b1ed7d3b433cabc18025386f9339346e8daf15963cf7ac" dependencies = [ "futures-core", "futures-sink", @@ -1048,15 +1048,15 @@ dependencies = [ [[package]] name = "futures-core" -version = "0.3.26" +version = "0.3.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec90ff4d0fe1f57d600049061dc6bb68ed03c7d2fbd697274c41805dcb3f8608" +checksum = "86d7a0c1aa76363dac491de0ee99faf6941128376f1cf96f07db7603b7de69dd" [[package]] name = "futures-executor" -version = "0.3.26" +version = "0.3.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8de0a35a6ab97ec8869e32a2473f4b1324459e14c29275d14b10cb1fd19b50e" +checksum = "1997dd9df74cdac935c76252744c1ed5794fac083242ea4fe77ef3ed60ba0f83" dependencies = [ "futures-core", "futures-task", @@ -1065,15 +1065,15 @@ dependencies = [ [[package]] name = "futures-io" -version = "0.3.26" +version = "0.3.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfb8371b6fb2aeb2d280374607aeabfc99d95c72edfe51692e42d3d7f0d08531" +checksum = "89d422fa3cbe3b40dca574ab087abb5bc98258ea57eea3fd6f1fa7162c778b91" [[package]] name = "futures-macro" -version = "0.3.26" +version = "0.3.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95a73af87da33b5acf53acfebdc339fe592ecf5357ac7c0a7734ab9d8c876a70" +checksum = "3eb14ed937631bd8b8b8977f2c198443447a8355b6e3ca599f38c975e5a963b6" dependencies = [ "proc-macro2", "quote", @@ -1082,21 +1082,21 @@ dependencies = [ [[package]] name = "futures-sink" -version = "0.3.26" +version = "0.3.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f310820bb3e8cfd46c80db4d7fb8353e15dfff853a127158425f31e0be6c8364" +checksum = "ec93083a4aecafb2a80a885c9de1f0ccae9dbd32c2bb54b0c3a65690e0b8d2f2" [[package]] name = "futures-task" -version = "0.3.26" +version = "0.3.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dcf79a1bf610b10f42aea489289c5a2c478a786509693b80cd39c44ccd936366" +checksum = "fd65540d33b37b16542a0438c12e6aeead10d4ac5d05bd3f805b8f35ab592879" [[package]] name = "futures-util" -version = "0.3.26" +version = "0.3.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c1d6de3acfef38d2be4b1f543f553131788603495be83da675e180c8d6b7bd1" +checksum = "3ef6b17e481503ec85211fed8f39d1970f128935ca1f814cd32ac4a6842e84ab" dependencies = [ "futures-channel", "futures-core", @@ -1247,9 +1247,9 @@ checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" [[package]] name = "hyper" -version = "0.14.24" +version = "0.14.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e011372fa0b68db8350aa7a248930ecc7839bf46d8485577d69f117a75f164c" +checksum = "cc5e554ff619822309ffd57d8734d77cd5ce6238bc956f037ea06c58238c9899" dependencies = [ "bytes", "futures-channel", @@ -1462,9 +1462,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.139" +version = "0.2.140" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "201de327520df007757c1f0adce6e827fe8562fbc28bfd9c15571c66ca1f5f79" +checksum = "99227334921fae1a979cf0bfdfcc6b3e5ce376ef57e16fb6fb3ea2ed6095f80c" [[package]] name = "libm" @@ -2172,18 +2172,18 @@ checksum = "e6b44e8fc93a14e66336d230954dda83d18b4605ccace8fe09bc7514a71ad0bc" [[package]] name = "serde" -version = "1.0.153" +version = "1.0.155" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a382c72b4ba118526e187430bb4963cd6d55051ebf13d9b25574d379cc98d20" +checksum = "71f2b4817415c6d4210bfe1c7bfcf4801b2d904cb4d0e1a8fdb651013c9e86b8" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.153" +version = "1.0.155" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ef476a5790f0f6decbc66726b6e5d63680ed518283e64c7df415989d880954f" +checksum = "d071a94a3fac4aff69d023a7f411e33f40f3483f8c5190b1953822b6b76d7630" dependencies = [ "proc-macro2", "quote", @@ -2569,9 +2569,9 @@ checksum = "497961ef93d974e23eb6f433eb5fe1b7930b659f06d12dec6fc44a8f554c0bba" [[package]] name = "unicode-bidi" -version = "0.3.10" +version = "0.3.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d54675592c1dbefd78cbd98db9bacd89886e1ca50692a0692baefffdeb92dd58" +checksum = "524b68aca1d05e03fdf03fcdce2c6c94b6daf6d16861ddaa7e4f2b6638a9052c" [[package]] name = "unicode-ident" @@ -2619,9 +2619,9 @@ dependencies = [ [[package]] name = "utf8parse" -version = "0.2.0" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "936e4b492acfd135421d8dca4b1aa80a7bfc26e702ef3af710e0752684df5372" +checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" [[package]] name = "uuid" diff --git a/datafusion/common/src/scalar.rs b/datafusion/common/src/scalar.rs index 10107306a614..69b3e1841ce3 100644 --- a/datafusion/common/src/scalar.rs +++ b/datafusion/common/src/scalar.rs @@ -648,27 +648,24 @@ fn ts_sub_to_interval( rhs_ts: &i64, lhs_tz: &Option, rhs_tz: &Option, - scale_factor: i32, + scale_factor: i64, ) -> Result { // Conversion of integer and string-typed timestamps to NaiveDateTime objects // Timezone offsets are added also if applicable. - let (naive_date_time2_unchecked, naive_date_time1_unchecked) = + let (naive_date_time2, naive_date_time1) = with_timezone_to_naive_datetime(lhs_ts, rhs_ts, lhs_tz, rhs_tz, &scale_factor)?; - // Check whether we will find a negative interval or not. - let (naive_date_time2, naive_date_time1, sign) = - find_interval_sign(naive_date_time2_unchecked, naive_date_time1_unchecked); - let delta_secs = naive_date_time2.signed_duration_since(naive_date_time1); + // 60 * 60 * 24 * 1000 = 86_400_000, number of millisecs in a day + let number_of_millisecs_in_day: i64 = 86_400_000; match scale_factor { - // 60 * 60 * 24 * 1000 = 86_400_000, number of millisecs in a day 1 => { let as_millisecs = delta_secs.num_milliseconds(); Ok(ScalarValue::IntervalDayTime(Some( IntervalDayTimeType::make_value( - sign * (as_millisecs / 86_400_000) as i32, - sign * (as_millisecs % 86_400_000) as i32, + (as_millisecs / number_of_millisecs_in_day) as i32, + (as_millisecs % number_of_millisecs_in_day) as i32, ), ))) } @@ -682,8 +679,8 @@ fn ts_sub_to_interval( Ok(ScalarValue::IntervalMonthDayNano(Some( IntervalMonthDayNanoType::make_value( 0, - sign * (as_nanosecs / 86_400_000_000_000) as i32, - sign as i64 * (as_nanosecs % 86_400_000_000_000), + (as_nanosecs / (number_of_millisecs_in_day * scale_factor)) as i32, + as_nanosecs % (number_of_millisecs_in_day * scale_factor), ), ))) } @@ -698,7 +695,7 @@ fn with_timezone_to_naive_datetime( rhs_ts: &i64, lhs_tz: &Option, rhs_tz: &Option, - scale_factor: &i32, + scale_factor: &i64, ) -> Result<(NaiveDateTime, NaiveDateTime), DataFusionError> { let (naive_lhs, naive_rhs) = match scale_factor { 1 => ms_to_naive_datetime(lhs_ts, rhs_ts)?, @@ -785,17 +782,6 @@ fn parse_tz_to_offset(tz: &str) -> Result { }; Ok(timezone_offset) } -#[inline] -fn find_interval_sign( - ndt2: NaiveDateTime, - ndt1: NaiveDateTime, -) -> (NaiveDateTime, NaiveDateTime, i32) { - if ndt2.timestamp_nanos() < ndt1.timestamp_nanos() { - (ndt1, ndt2, -1) - } else { - (ndt2, ndt1, 1) - } -} #[inline] pub fn date32_add(days: i32, scalar: &ScalarValue, sign: i32) -> Result { @@ -4755,7 +4741,7 @@ mod tests { fn timestamp_op_random_tests() { // timestamp1 + (or -) interval = timestamp2 // timestamp2 - timestamp1 (or timestamp1 - timestamp2) = interval ? - let sample_size = 1000000; + let sample_size = 100000; let timestamps1 = get_random_timestamps(sample_size); let intervals = get_random_intervals(sample_size); // ts(sec) + interval(ns) = ts(sec); however, From c274aefb1d2484fa8aa50c0006d58187f1a1d4a9 Mon Sep 17 00:00:00 2001 From: berkaysynnada Date: Mon, 13 Mar 2023 13:32:14 +0300 Subject: [PATCH 13/23] test format changed --- datafusion-cli/Cargo.lock | 16 +- datafusion/common/src/scalar.rs | 499 ++++++++++++++++---------------- 2 files changed, 265 insertions(+), 250 deletions(-) diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock index a5e40209fe1d..d253d1d90a97 100644 --- a/datafusion-cli/Cargo.lock +++ b/datafusion-cli/Cargo.lock @@ -440,9 +440,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "chrono" -version = "0.4.23" +version = "0.4.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "16b0a3d9ed01224b22057780a37bb8c5dbfe1be8ba48678e7bf57ec4b385411f" +checksum = "4e3c5919066adf22df73762e50cffcde3a758f2a848b113b586d1f86728b673b" dependencies = [ "iana-time-zone", "num-integer", @@ -1889,9 +1889,9 @@ checksum = "dc375e1527247fe1a97d8b7156678dfe7c1af2fc075c9a4db3690ecd2a148068" [[package]] name = "proc-macro2" -version = "1.0.51" +version = "1.0.52" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d727cae5b39d21da60fa540906919ad737832fe0b1c165da3a34d6548c849d6" +checksum = "1d0e1ae9e836cc3beddd63db0df682593d7e2d3d891ae8c9083d2113e1744224" dependencies = [ "unicode-ident", ] @@ -1908,9 +1908,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.23" +version = "1.0.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8856d8364d252a14d474036ea1358d63c9e6965c8e5c1885c18f73d70bff9c7b" +checksum = "50686e0021c4136d1d453b2dfe059902278681512a34d4248435dc34b6b5c8ec" dependencies = [ "proc-macro2", ] @@ -2160,9 +2160,9 @@ dependencies = [ [[package]] name = "semver" -version = "1.0.16" +version = "1.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "58bc9567378fc7690d6b2addae4e60ac2eeea07becb2c64b9f218b53865cba2a" +checksum = "bebd363326d05ec3e2f532ab7660680f3b02130d780c299bca73469d521bc0ed" [[package]] name = "seq-macro" diff --git a/datafusion/common/src/scalar.rs b/datafusion/common/src/scalar.rs index 69b3e1841ce3..963e60555144 100644 --- a/datafusion/common/src/scalar.rs +++ b/datafusion/common/src/scalar.rs @@ -519,7 +519,7 @@ macro_rules! impl_op { &ts_rhs.checked_mul(1_000).ok_or_else(err)?, &tz_lhs, &tz_rhs, - 1, + IntervalMode::Milli, )?) } _ => Err(DataFusionError::Internal(format!( @@ -533,7 +533,13 @@ macro_rules! impl_op { ScalarValue::TimestampMillisecond(Some(ts_lhs), tz_lhs), ScalarValue::TimestampMillisecond(Some(ts_rhs), tz_rhs), ) => match get_sign!($OPERATION) { - -1 => Ok(ts_sub_to_interval(&ts_lhs, &ts_rhs, &tz_lhs, &tz_rhs, 1)?), + -1 => Ok(ts_sub_to_interval( + &ts_lhs, + &ts_rhs, + &tz_lhs, + &tz_rhs, + IntervalMode::Milli, + )?), _ => Err(DataFusionError::Internal(format!( "Operator {} is not implemented for {:?} and {:?}", stringify!($OPERATION), @@ -557,7 +563,7 @@ macro_rules! impl_op { &ts_rhs.checked_mul(1_000).ok_or_else(err)?, &tz_lhs, &tz_rhs, - 1_000_000, + IntervalMode::Nano, )?) } _ => Err(DataFusionError::Internal(format!( @@ -572,7 +578,11 @@ macro_rules! impl_op { ScalarValue::TimestampNanosecond(Some(ts_rhs), tz_rhs), ) => match get_sign!($OPERATION) { -1 => Ok(ts_sub_to_interval( - &ts_lhs, &ts_rhs, &tz_lhs, &tz_rhs, 1_000_000, + &ts_lhs, + &ts_rhs, + &tz_lhs, + &tz_rhs, + IntervalMode::Nano, )?), _ => Err(DataFusionError::Internal(format!( "Operator {} is not implemented for {:?} and {:?}", @@ -641,6 +651,10 @@ macro_rules! get_sign { }; } +enum IntervalMode { + Milli, + Nano, +} // Timestamp(sec) and Timestamp(millisec) difference is resulting as Interval(days, millis) // Timestamp(microsec) and Tiemstamp(nanosec) difference is resulting as Interval(days, nanos) fn ts_sub_to_interval( @@ -648,19 +662,19 @@ fn ts_sub_to_interval( rhs_ts: &i64, lhs_tz: &Option, rhs_tz: &Option, - scale_factor: i64, + mode: IntervalMode, ) -> Result { // Conversion of integer and string-typed timestamps to NaiveDateTime objects // Timezone offsets are added also if applicable. let (naive_date_time2, naive_date_time1) = - with_timezone_to_naive_datetime(lhs_ts, rhs_ts, lhs_tz, rhs_tz, &scale_factor)?; + with_timezone_to_naive_datetime(lhs_ts, rhs_ts, lhs_tz, rhs_tz, &mode)?; let delta_secs = naive_date_time2.signed_duration_since(naive_date_time1); // 60 * 60 * 24 * 1000 = 86_400_000, number of millisecs in a day let number_of_millisecs_in_day: i64 = 86_400_000; - match scale_factor { - 1 => { + match mode { + IntervalMode::Milli => { let as_millisecs = delta_secs.num_milliseconds(); Ok(ScalarValue::IntervalDayTime(Some( IntervalDayTimeType::make_value( @@ -670,7 +684,7 @@ fn ts_sub_to_interval( ))) } // 60 * 60 * 24 * 1000_000_000 = 86_400_000_000_000, number of nanosecs in a day - 1_000_000 => { + IntervalMode::Nano => { let as_nanosecs = delta_secs.num_nanoseconds().ok_or_else(|| { DataFusionError::Execution(String::from( "timestamp difference cannot be shown in nanosecond precision", @@ -679,14 +693,11 @@ fn ts_sub_to_interval( Ok(ScalarValue::IntervalMonthDayNano(Some( IntervalMonthDayNanoType::make_value( 0, - (as_nanosecs / (number_of_millisecs_in_day * scale_factor)) as i32, - as_nanosecs % (number_of_millisecs_in_day * scale_factor), + (as_nanosecs / (number_of_millisecs_in_day * 1_000_000)) as i32, + as_nanosecs % (number_of_millisecs_in_day * 1_000_000), ), ))) } - _ => Err(DataFusionError::Execution(String::from( - "undefined scale factor", - ))), } } #[inline] @@ -695,16 +706,11 @@ fn with_timezone_to_naive_datetime( rhs_ts: &i64, lhs_tz: &Option, rhs_tz: &Option, - scale_factor: &i64, + mode: &IntervalMode, ) -> Result<(NaiveDateTime, NaiveDateTime), DataFusionError> { - let (naive_lhs, naive_rhs) = match scale_factor { - 1 => ms_to_naive_datetime(lhs_ts, rhs_ts)?, - 1_000_000 => ns_to_naive_datetime(lhs_ts, rhs_ts)?, - _ => { - return Err(DataFusionError::Execution(String::from( - "undefined scale factor", - ))) - } + let (naive_lhs, naive_rhs) = match mode { + IntervalMode::Milli => ms_to_naive_datetime(lhs_ts, rhs_ts)?, + IntervalMode::Nano => ns_to_naive_datetime(lhs_ts, rhs_ts)?, }; match (lhs_tz, rhs_tz) { @@ -4711,37 +4717,23 @@ mod tests { #[test] fn timestamp_op_tests() { // positive interval, edge cases - let vec_timestamps_next = timestamps_next(); - let vec_timestamps_prev = timestamps_prev(); - let expected_results = get_expected_results(1); - - for (idx, exp) in expected_results.iter().enumerate() { - assert_eq!( - *exp, - vec_timestamps_next[idx] - .sub(&vec_timestamps_prev[idx]) - .unwrap() - ) + let test_data = get_test_data(1); + + for (idx, exp) in test_data.iter().enumerate() { + assert_eq!(exp.2, test_data[idx].0.sub(&test_data[idx].1).unwrap()) } // negative interval, edge cases - let vec_timestamps_next = timestamps_prev(); - let vec_timestamps_prev = timestamps_next(); - let expected_results = get_expected_results(-1); - for (idx, exp) in expected_results.iter().enumerate() { - assert_eq!( - *exp, - vec_timestamps_next[idx] - .sub(&vec_timestamps_prev[idx]) - .unwrap() - ); + let test_data = get_test_data(-1); + for (idx, exp) in test_data.iter().enumerate() { + assert_eq!(exp.2, test_data[idx].1.sub(&test_data[idx].0).unwrap()); } } #[test] fn timestamp_op_random_tests() { // timestamp1 + (or -) interval = timestamp2 // timestamp2 - timestamp1 (or timestamp1 - timestamp2) = interval ? - let sample_size = 100000; + let sample_size = 1000000; let timestamps1 = get_random_timestamps(sample_size); let intervals = get_random_intervals(sample_size); // ts(sec) + interval(ns) = ts(sec); however, @@ -4772,231 +4764,254 @@ mod tests { } } - fn timestamps_next() -> Vec { - vec![ - ScalarValue::TimestampNanosecond( - Some( - NaiveDate::from_ymd_opt(2023, 1, 1) - .unwrap() - .and_hms_nano_opt(1, 0, 0, 000_000_000) - .unwrap() - .timestamp_nanos(), + fn get_test_data(sign: i32) -> Vec<(ScalarValue, ScalarValue, ScalarValue)> { + let test_data = vec![ + ( + // 1. test case + ScalarValue::TimestampNanosecond( + Some( + NaiveDate::from_ymd_opt(2023, 1, 1) + .unwrap() + .and_hms_nano_opt(1, 0, 0, 000_000_000) + .unwrap() + .timestamp_nanos(), + ), + Some("+01:00".to_string()), ), - Some("+01:00".to_string()), - ), - ScalarValue::TimestampMicrosecond( - Some( - NaiveDate::from_ymd_opt(2023, 3, 1) - .unwrap() - .and_hms_micro_opt(2, 0, 0, 000_000) - .unwrap() - .timestamp_micros(), + ScalarValue::TimestampNanosecond( + Some( + NaiveDate::from_ymd_opt(2023, 1, 1) + .unwrap() + .and_hms_nano_opt(0, 0, 0, 000_000_000) + .unwrap() + .timestamp_nanos(), + ), + Some("+00:00".to_string()), ), - Some("+01:00".to_string()), + ScalarValue::IntervalMonthDayNano(Some( + IntervalMonthDayNanoType::make_value(0, 0, 0), + )), ), - ScalarValue::TimestampMillisecond( - Some( - NaiveDate::from_ymd_opt(2023, 2, 11) - .unwrap() - .and_hms_milli_opt(10, 10, 0, 000) - .unwrap() - .timestamp_millis(), + // 2. test case + ( + ScalarValue::TimestampMicrosecond( + Some( + NaiveDate::from_ymd_opt(2023, 3, 1) + .unwrap() + .and_hms_micro_opt(2, 0, 0, 000_000) + .unwrap() + .timestamp_micros(), + ), + Some("+01:00".to_string()), ), - Some("+10:10".to_string()), - ), - ScalarValue::TimestampSecond( - Some( - NaiveDate::from_ymd_opt(2023, 3, 1) - .unwrap() - .and_hms_opt(0, 0, 0) - .unwrap() - .timestamp(), + ScalarValue::TimestampMicrosecond( + Some( + NaiveDate::from_ymd_opt(2023, 1, 1) + .unwrap() + .and_hms_micro_opt(0, 0, 0, 000_000) + .unwrap() + .timestamp_micros(), + ), + Some("-01:00".to_string()), ), - Some("-11:59".to_string()), + ScalarValue::IntervalMonthDayNano(Some( + IntervalMonthDayNanoType::make_value(0, sign * 59, 0), + )), ), - ScalarValue::TimestampMillisecond( - Some( - NaiveDate::from_ymd_opt(2023, 3, 1) - .unwrap() - .and_hms_milli_opt(23, 58, 0, 250) - .unwrap() - .timestamp_millis(), + // 3. test case + ( + ScalarValue::TimestampMillisecond( + Some( + NaiveDate::from_ymd_opt(2023, 2, 11) + .unwrap() + .and_hms_milli_opt(10, 10, 0, 000) + .unwrap() + .timestamp_millis(), + ), + Some("+10:10".to_string()), ), - Some("+11:59".to_string()), - ), - ScalarValue::TimestampMicrosecond( - Some( - NaiveDate::from_ymd_opt(2023, 3, 1) - .unwrap() - .and_hms_micro_opt(0, 0, 0, 15) - .unwrap() - .timestamp_micros(), + ScalarValue::TimestampMillisecond( + Some( + NaiveDate::from_ymd_opt(2023, 1, 1) + .unwrap() + .and_hms_milli_opt(1, 0, 0, 000) + .unwrap() + .timestamp_millis(), + ), + Some("+01:00".to_string()), ), - None, + ScalarValue::IntervalDayTime(Some(IntervalDayTimeType::make_value( + sign * 41, + 0, + ))), ), - ScalarValue::TimestampNanosecond( - Some( - NaiveDate::from_ymd_opt(2023, 3, 1) - .unwrap() - .and_hms_nano_opt(0, 0, 0, 22) - .unwrap() - .timestamp_nanos(), + // 4. test case + ( + ScalarValue::TimestampSecond( + Some( + NaiveDate::from_ymd_opt(2023, 3, 1) + .unwrap() + .and_hms_opt(0, 0, 0) + .unwrap() + .timestamp(), + ), + Some("-11:59".to_string()), ), - None, - ), - ScalarValue::TimestampSecond( - Some( - NaiveDate::from_ymd_opt(2023, 3, 1) - .unwrap() - .and_hms_opt(0, 0, 0) - .unwrap() - .timestamp(), + ScalarValue::TimestampSecond( + Some( + NaiveDate::from_ymd_opt(2023, 1, 1) + .unwrap() + .and_hms_opt(23, 58, 0) + .unwrap() + .timestamp(), + ), + Some("+11:59".to_string()), ), - None, + ScalarValue::IntervalDayTime(Some(IntervalDayTimeType::make_value( + sign * 59, + 0, + ))), ), - ScalarValue::TimestampSecond( - Some( - NaiveDate::from_ymd_opt(2023, 12, 1) - .unwrap() - .and_hms_opt(0, 0, 0) - .unwrap() - .timestamp(), + // 5. test case + ( + ScalarValue::TimestampMillisecond( + Some( + NaiveDate::from_ymd_opt(2023, 3, 1) + .unwrap() + .and_hms_milli_opt(23, 58, 0, 250) + .unwrap() + .timestamp_millis(), + ), + Some("+11:59".to_string()), ), - None, - ), - ] - } - - fn timestamps_prev() -> Vec { - vec![ - ScalarValue::TimestampNanosecond( - Some( - NaiveDate::from_ymd_opt(2023, 1, 1) - .unwrap() - .and_hms_nano_opt(0, 0, 0, 000_000_000) - .unwrap() - .timestamp_nanos(), + ScalarValue::TimestampMillisecond( + Some( + NaiveDate::from_ymd_opt(2023, 1, 1) + .unwrap() + .and_hms_milli_opt(0, 0, 0, 000) + .unwrap() + .timestamp_millis(), + ), + Some("-11:59".to_string()), ), - Some("+00:00".to_string()), + ScalarValue::IntervalDayTime(Some(IntervalDayTimeType::make_value( + sign * 59, + sign * 250, + ))), ), - ScalarValue::TimestampMicrosecond( - Some( - NaiveDate::from_ymd_opt(2023, 1, 1) - .unwrap() - .and_hms_micro_opt(0, 0, 0, 000_000) - .unwrap() - .timestamp_micros(), + // 6. test case + ( + ScalarValue::TimestampMicrosecond( + Some( + NaiveDate::from_ymd_opt(2023, 3, 1) + .unwrap() + .and_hms_micro_opt(0, 0, 0, 15) + .unwrap() + .timestamp_micros(), + ), + None, ), - Some("-01:00".to_string()), - ), - ScalarValue::TimestampMillisecond( - Some( - NaiveDate::from_ymd_opt(2023, 1, 1) - .unwrap() - .and_hms_milli_opt(1, 0, 0, 000) - .unwrap() - .timestamp_millis(), + ScalarValue::TimestampMicrosecond( + Some( + NaiveDate::from_ymd_opt(2023, 1, 1) + .unwrap() + .and_hms_micro_opt(0, 0, 0, 000_000) + .unwrap() + .timestamp_micros(), + ), + None, ), - Some("+01:00".to_string()), + ScalarValue::IntervalMonthDayNano(Some( + IntervalMonthDayNanoType::make_value( + 0, + sign * 59, + sign as i64 * 15_000, + ), + )), ), - ScalarValue::TimestampSecond( - Some( - NaiveDate::from_ymd_opt(2023, 1, 1) - .unwrap() - .and_hms_opt(23, 58, 0) - .unwrap() - .timestamp(), + // 7. test case + ( + ScalarValue::TimestampNanosecond( + Some( + NaiveDate::from_ymd_opt(2023, 3, 1) + .unwrap() + .and_hms_nano_opt(0, 0, 0, 22) + .unwrap() + .timestamp_nanos(), + ), + None, ), - Some("+11:59".to_string()), - ), - ScalarValue::TimestampMillisecond( - Some( - NaiveDate::from_ymd_opt(2023, 1, 1) - .unwrap() - .and_hms_milli_opt(0, 0, 0, 000) - .unwrap() - .timestamp_millis(), + ScalarValue::TimestampNanosecond( + Some( + NaiveDate::from_ymd_opt(2023, 1, 31) + .unwrap() + .and_hms_nano_opt(0, 0, 0, 000_000_000) + .unwrap() + .timestamp_nanos(), + ), + None, ), - Some("-11:59".to_string()), + ScalarValue::IntervalMonthDayNano(Some( + IntervalMonthDayNanoType::make_value(0, sign * 29, sign as i64 * 22), + )), ), - ScalarValue::TimestampMicrosecond( - Some( - NaiveDate::from_ymd_opt(2023, 1, 1) - .unwrap() - .and_hms_micro_opt(0, 0, 0, 000_000) - .unwrap() - .timestamp_micros(), + // 8. test case + ( + ScalarValue::TimestampSecond( + Some( + NaiveDate::from_ymd_opt(2023, 3, 1) + .unwrap() + .and_hms_opt(0, 0, 0) + .unwrap() + .timestamp(), + ), + None, ), - None, - ), - ScalarValue::TimestampNanosecond( - Some( - NaiveDate::from_ymd_opt(2023, 1, 31) - .unwrap() - .and_hms_nano_opt(0, 0, 0, 000_000_000) - .unwrap() - .timestamp_nanos(), + ScalarValue::TimestampSecond( + Some( + NaiveDate::from_ymd_opt(2021, 12, 30) + .unwrap() + .and_hms_opt(0, 0, 30) + .unwrap() + .timestamp(), + ), + None, ), - None, + ScalarValue::IntervalDayTime(Some(IntervalDayTimeType::make_value( + sign * 425, + sign * 86370000, + ))), ), - ScalarValue::TimestampSecond( - Some( - NaiveDate::from_ymd_opt(2021, 12, 30) - .unwrap() - .and_hms_opt(0, 0, 30) - .unwrap() - .timestamp(), + // 9. test case + ( + ScalarValue::TimestampSecond( + Some( + NaiveDate::from_ymd_opt(2023, 12, 1) + .unwrap() + .and_hms_opt(0, 0, 0) + .unwrap() + .timestamp(), + ), + None, ), - None, - ), - ScalarValue::TimestampSecond( - Some( - NaiveDate::from_ymd_opt(1980, 11, 1) - .unwrap() - .and_hms_opt(0, 0, 0) - .unwrap() - .timestamp(), + ScalarValue::TimestampSecond( + Some( + NaiveDate::from_ymd_opt(1980, 11, 1) + .unwrap() + .and_hms_opt(0, 0, 0) + .unwrap() + .timestamp(), + ), + None, ), - None, + ScalarValue::IntervalDayTime(Some(IntervalDayTimeType::make_value( + sign * 15735, + 0, + ))), ), - ] - } + ]; - fn get_expected_results(sign: i32) -> Vec { - vec![ - ScalarValue::IntervalMonthDayNano(Some( - IntervalMonthDayNanoType::make_value(0, 0, 0), - )), - ScalarValue::IntervalMonthDayNano(Some( - IntervalMonthDayNanoType::make_value(0, sign * 59, 0), - )), - ScalarValue::IntervalDayTime(Some(IntervalDayTimeType::make_value( - sign * 41, - 0, - ))), - ScalarValue::IntervalDayTime(Some(IntervalDayTimeType::make_value( - sign * 59, - 0, - ))), - ScalarValue::IntervalDayTime(Some(IntervalDayTimeType::make_value( - sign * 59, - sign * 250, - ))), - ScalarValue::IntervalMonthDayNano(Some( - IntervalMonthDayNanoType::make_value(0, sign * 59, sign as i64 * 15_000), - )), - ScalarValue::IntervalMonthDayNano(Some( - IntervalMonthDayNanoType::make_value(0, sign * 29, sign as i64 * 22), - )), - ScalarValue::IntervalDayTime(Some(IntervalDayTimeType::make_value( - sign * 425, - sign * 86370000, - ))), - ScalarValue::IntervalDayTime(Some(IntervalDayTimeType::make_value( - sign * 15735, - 0, - ))), - ] + test_data } fn get_random_timestamps(sample_size: u64) -> Vec { From 968a6824af09ccb9a5581d8c238ebac1ab0dff1f Mon Sep 17 00:00:00 2001 From: berkaysynnada Date: Mon, 13 Mar 2023 13:57:46 +0300 Subject: [PATCH 14/23] minor test fix --- datafusion/common/src/scalar.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/datafusion/common/src/scalar.rs b/datafusion/common/src/scalar.rs index 963e60555144..cee1c4a4bc61 100644 --- a/datafusion/common/src/scalar.rs +++ b/datafusion/common/src/scalar.rs @@ -4719,14 +4719,14 @@ mod tests { // positive interval, edge cases let test_data = get_test_data(1); - for (idx, exp) in test_data.iter().enumerate() { - assert_eq!(exp.2, test_data[idx].0.sub(&test_data[idx].1).unwrap()) + for (lhs, rhs, expected) in test_data.iter() { + assert_eq!(expected, &lhs.sub(rhs).unwrap()) } // negative interval, edge cases let test_data = get_test_data(-1); - for (idx, exp) in test_data.iter().enumerate() { - assert_eq!(exp.2, test_data[idx].1.sub(&test_data[idx].0).unwrap()); + for (rhs, lhs, expected) in test_data.iter() { + assert_eq!(expected, &lhs.sub(rhs).unwrap()); } } #[test] From ed637796a7177ecc52986a06b948a696fe80a00f Mon Sep 17 00:00:00 2001 From: berkaysynnada Date: Mon, 13 Mar 2023 15:32:10 +0300 Subject: [PATCH 15/23] Update scalar.rs --- datafusion/common/src/scalar.rs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/datafusion/common/src/scalar.rs b/datafusion/common/src/scalar.rs index cee1c4a4bc61..46e6a25a74a0 100644 --- a/datafusion/common/src/scalar.rs +++ b/datafusion/common/src/scalar.rs @@ -4767,7 +4767,7 @@ mod tests { fn get_test_data(sign: i32) -> Vec<(ScalarValue, ScalarValue, ScalarValue)> { let test_data = vec![ ( - // 1. test case + // 1st test case ScalarValue::TimestampNanosecond( Some( NaiveDate::from_ymd_opt(2023, 1, 1) @@ -4792,7 +4792,7 @@ mod tests { IntervalMonthDayNanoType::make_value(0, 0, 0), )), ), - // 2. test case + // 2nd test case ( ScalarValue::TimestampMicrosecond( Some( @@ -4818,7 +4818,7 @@ mod tests { IntervalMonthDayNanoType::make_value(0, sign * 59, 0), )), ), - // 3. test case + // 3rd test case ( ScalarValue::TimestampMillisecond( Some( @@ -4845,7 +4845,7 @@ mod tests { 0, ))), ), - // 4. test case + // 4th test case ( ScalarValue::TimestampSecond( Some( @@ -4872,7 +4872,7 @@ mod tests { 0, ))), ), - // 5. test case + // 5th test case ( ScalarValue::TimestampMillisecond( Some( @@ -4899,7 +4899,7 @@ mod tests { sign * 250, ))), ), - // 6. test case + // 6th test case ( ScalarValue::TimestampMicrosecond( Some( @@ -4929,7 +4929,7 @@ mod tests { ), )), ), - // 7. test case + // 7th test case ( ScalarValue::TimestampNanosecond( Some( @@ -4955,7 +4955,7 @@ mod tests { IntervalMonthDayNanoType::make_value(0, sign * 29, sign as i64 * 22), )), ), - // 8. test case + // 8th test case ( ScalarValue::TimestampSecond( Some( @@ -4982,7 +4982,7 @@ mod tests { sign * 86370000, ))), ), - // 9. test case + // 9th test case ( ScalarValue::TimestampSecond( Some( From 68ea6479333620fe583f601608cd24454832442a Mon Sep 17 00:00:00 2001 From: Mehmet Ozan Kabak Date: Mon, 13 Mar 2023 18:10:59 -0500 Subject: [PATCH 16/23] Refactoring and simplifications --- datafusion-cli/Cargo.lock | 39 ++-- datafusion/common/Cargo.toml | 3 +- datafusion/common/src/scalar.rs | 364 ++++++++++++++------------------ 3 files changed, 178 insertions(+), 228 deletions(-) diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock index 6a2763502b9e..84b4ec7101f8 100644 --- a/datafusion-cli/Cargo.lock +++ b/datafusion-cli/Cargo.lock @@ -741,7 +741,6 @@ dependencies = [ "num_cpus", "object_store", "parquet", - "rand", "sqlparser", ] @@ -1908,9 +1907,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.24" +version = "1.0.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50686e0021c4136d1d453b2dfe059902278681512a34d4248435dc34b6b5c8ec" +checksum = "4424af4bf778aae2051a77b60283332f386554255d722233d09fbfc7e30da2fc" dependencies = [ "proc-macro2", ] @@ -2175,7 +2174,6 @@ name = "serde" version = "1.0.155" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "71f2b4817415c6d4210bfe1c7bfcf4801b2d904cb4d0e1a8fdb651013c9e86b8" - dependencies = [ "serde_derive", ] @@ -2185,7 +2183,6 @@ name = "serde_derive" version = "1.0.155" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d071a94a3fac4aff69d023a7f411e33f40f3483f8c5190b1953822b6b76d7630" - dependencies = [ "proc-macro2", "quote", @@ -2832,9 +2829,9 @@ dependencies = [ [[package]] name = "windows-targets" -version = "0.42.1" +version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e2522491fbfcd58cc84d47aeb2958948c4b8982e9a2d8a2a35bbaed431390e7" +checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071" dependencies = [ "windows_aarch64_gnullvm", "windows_aarch64_msvc", @@ -2847,45 +2844,45 @@ dependencies = [ [[package]] name = "windows_aarch64_gnullvm" -version = "0.42.1" +version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c9864e83243fdec7fc9c5444389dcbbfd258f745e7853198f365e3c4968a608" +checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8" [[package]] name = "windows_aarch64_msvc" -version = "0.42.1" +version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c8b1b673ffc16c47a9ff48570a9d85e25d265735c503681332589af6253c6c7" +checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43" [[package]] name = "windows_i686_gnu" -version = "0.42.1" +version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "de3887528ad530ba7bdbb1faa8275ec7a1155a45ffa57c37993960277145d640" +checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f" [[package]] name = "windows_i686_msvc" -version = "0.42.1" +version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf4d1122317eddd6ff351aa852118a2418ad4214e6613a50e0191f7004372605" +checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060" [[package]] name = "windows_x86_64_gnu" -version = "0.42.1" +version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1040f221285e17ebccbc2591ffdc2d44ee1f9186324dd3e84e99ac68d699c45" +checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36" [[package]] name = "windows_x86_64_gnullvm" -version = "0.42.1" +version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "628bfdf232daa22b0d64fdb62b09fcc36bb01f05a3939e20ab73aaf9470d0463" +checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3" [[package]] name = "windows_x86_64_msvc" -version = "0.42.1" +version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "447660ad36a13288b1db4d4248e857b510e8c3a225c822ba4fb748c0aafecffd" +checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0" [[package]] name = "winreg" diff --git a/datafusion/common/Cargo.toml b/datafusion/common/Cargo.toml index 4780962f3e8e..444ce9a2e0ae 100644 --- a/datafusion/common/Cargo.toml +++ b/datafusion/common/Cargo.toml @@ -47,6 +47,7 @@ num_cpus = "1.13.0" object_store = { version = "0.5.4", default-features = false, optional = true } parquet = { workspace = true, default-features = false, optional = true } pyo3 = { version = "0.18.0", optional = true } -rand = "0.8.4" sqlparser = "0.32" +[dev-dependencies] +rand = "0.8.4" diff --git a/datafusion/common/src/scalar.rs b/datafusion/common/src/scalar.rs index 46e6a25a74a0..bedbd1a328c5 100644 --- a/datafusion/common/src/scalar.rs +++ b/datafusion/common/src/scalar.rs @@ -464,6 +464,71 @@ macro_rules! unsigned_subtraction_error { } macro_rules! impl_op { + ($LHS:expr, $RHS:expr, +) => { + impl_op_symmetric!($LHS, $RHS, +) + }; + ($LHS:expr, $RHS:expr, -) => { + match ($LHS, $RHS) { + ( + ScalarValue::TimestampSecond(Some(ts_lhs), tz_lhs), + ScalarValue::TimestampSecond(Some(ts_rhs), tz_rhs), + ) => { + let err = || { + DataFusionError::Execution( + "Overflow while converting seconds to milliseconds".to_string(), + ) + }; + ts_sub_to_interval( + ts_lhs.checked_mul(1_000).ok_or_else(err)?, + ts_rhs.checked_mul(1_000).ok_or_else(err)?, + &tz_lhs, + &tz_rhs, + IntervalMode::Milli, + ) + }, + ( + ScalarValue::TimestampMillisecond(Some(ts_lhs), tz_lhs), + ScalarValue::TimestampMillisecond(Some(ts_rhs), tz_rhs), + ) => ts_sub_to_interval( + *ts_lhs, + *ts_rhs, + tz_lhs, + tz_rhs, + IntervalMode::Milli, + ), + ( + ScalarValue::TimestampMicrosecond(Some(ts_lhs), tz_lhs), + ScalarValue::TimestampMicrosecond(Some(ts_rhs), tz_rhs), + ) => { + let err = || { + DataFusionError::Execution( + "Overflow while converting microseconds to nanoseconds".to_string(), + ) + }; + ts_sub_to_interval( + ts_lhs.checked_mul(1_000).ok_or_else(err)?, + ts_rhs.checked_mul(1_000).ok_or_else(err)?, + tz_lhs, + tz_rhs, + IntervalMode::Nano, + ) + }, + ( + ScalarValue::TimestampNanosecond(Some(ts_lhs), tz_lhs), + ScalarValue::TimestampNanosecond(Some(ts_rhs), tz_rhs), + ) => ts_sub_to_interval( + *ts_lhs, + *ts_rhs, + tz_lhs, + tz_rhs, + IntervalMode::Nano, + ), + _ => impl_op_symmetric!($LHS, $RHS, -) + } + }; +} + +macro_rules! impl_op_symmetric { ($LHS:expr, $RHS:expr, $OPERATION:tt) => { match ($LHS, $RHS) { // Binary operations on arguments with the same type: @@ -503,94 +568,6 @@ macro_rules! impl_op { (ScalarValue::Int8(lhs), ScalarValue::Int8(rhs)) => { primitive_op!(lhs, rhs, Int8, $OPERATION) } - ( - ScalarValue::TimestampSecond(Some(ts_lhs), tz_lhs), - ScalarValue::TimestampSecond(Some(ts_rhs), tz_rhs), - ) => match get_sign!($OPERATION) { - -1 => { - let err = || { - DataFusionError::Execution( - "Overflow while conversion from second to millisecond" - .to_string(), - ) - }; - Ok(ts_sub_to_interval( - &ts_lhs.checked_mul(1_000).ok_or_else(err)?, - &ts_rhs.checked_mul(1_000).ok_or_else(err)?, - &tz_lhs, - &tz_rhs, - IntervalMode::Milli, - )?) - } - _ => Err(DataFusionError::Internal(format!( - "Operator {} is not implemented for {:?} and {:?}", - stringify!($OPERATION), - $LHS, - $RHS - ))), - }, - ( - ScalarValue::TimestampMillisecond(Some(ts_lhs), tz_lhs), - ScalarValue::TimestampMillisecond(Some(ts_rhs), tz_rhs), - ) => match get_sign!($OPERATION) { - -1 => Ok(ts_sub_to_interval( - &ts_lhs, - &ts_rhs, - &tz_lhs, - &tz_rhs, - IntervalMode::Milli, - )?), - _ => Err(DataFusionError::Internal(format!( - "Operator {} is not implemented for {:?} and {:?}", - stringify!($OPERATION), - $LHS, - $RHS - ))), - }, - ( - ScalarValue::TimestampMicrosecond(Some(ts_lhs), tz_lhs), - ScalarValue::TimestampMicrosecond(Some(ts_rhs), tz_rhs), - ) => match get_sign!($OPERATION) { - -1 => { - let err = || { - DataFusionError::Execution( - "Overflow while conversion from microsecond to nanosecond" - .to_string(), - ) - }; - Ok(ts_sub_to_interval( - &ts_lhs.checked_mul(1_000).ok_or_else(err)?, - &ts_rhs.checked_mul(1_000).ok_or_else(err)?, - &tz_lhs, - &tz_rhs, - IntervalMode::Nano, - )?) - } - _ => Err(DataFusionError::Internal(format!( - "Operator {} is not implemented for {:?} and {:?}", - stringify!($OPERATION), - $LHS, - $RHS - ))), - }, - ( - ScalarValue::TimestampNanosecond(Some(ts_lhs), tz_lhs), - ScalarValue::TimestampNanosecond(Some(ts_rhs), tz_rhs), - ) => match get_sign!($OPERATION) { - -1 => Ok(ts_sub_to_interval( - &ts_lhs, - &ts_rhs, - &tz_lhs, - &tz_rhs, - IntervalMode::Nano, - )?), - _ => Err(DataFusionError::Internal(format!( - "Operator {} is not implemented for {:?} and {:?}", - stringify!($OPERATION), - $LHS, - $RHS - ))), - }, // Binary operations on arguments with different types: (ScalarValue::Date32(Some(days)), _) => { let value = date32_add(*days, $RHS, get_sign!($OPERATION))?; @@ -651,184 +628,156 @@ macro_rules! get_sign { }; } +#[derive(Clone, Copy)] enum IntervalMode { Milli, Nano, } -// Timestamp(sec) and Timestamp(millisec) difference is resulting as Interval(days, millis) -// Timestamp(microsec) and Tiemstamp(nanosec) difference is resulting as Interval(days, nanos) + +/// This function computes subtracts `rhs_ts` from `lhs_ts`, taking timezones +/// into account when given. Units of the resulting interval is specified by +/// the argument `mode`. +/// The default behavior of Datafusion is the following: +/// - When subtracting timestamps at seconds/milliseconds precision, the output +/// interval will have the type [`IntervalDayTimeType`]. +/// - When subtracting timestamps at microseconds/nanoseconds precision, the +/// output interval will have the type [`IntervalMonthDayNano`]. fn ts_sub_to_interval( - lhs_ts: &i64, - rhs_ts: &i64, + lhs_ts: i64, + rhs_ts: i64, lhs_tz: &Option, rhs_tz: &Option, mode: IntervalMode, ) -> Result { - // Conversion of integer and string-typed timestamps to NaiveDateTime objects - // Timezone offsets are added also if applicable. - let (naive_date_time2, naive_date_time1) = - with_timezone_to_naive_datetime(lhs_ts, rhs_ts, lhs_tz, rhs_tz, &mode)?; - - let delta_secs = naive_date_time2.signed_duration_since(naive_date_time1); + let lhs_dt = with_timezone_to_naive_datetime(lhs_ts, lhs_tz, mode)?; + let rhs_dt = with_timezone_to_naive_datetime(rhs_ts, rhs_tz, mode)?; + let delta_secs = lhs_dt.signed_duration_since(rhs_dt); - // 60 * 60 * 24 * 1000 = 86_400_000, number of millisecs in a day - let number_of_millisecs_in_day: i64 = 86_400_000; match mode { IntervalMode::Milli => { + const MILLISECS_IN_ONE_DAY: i64 = 86_400_000; let as_millisecs = delta_secs.num_milliseconds(); Ok(ScalarValue::IntervalDayTime(Some( IntervalDayTimeType::make_value( - (as_millisecs / number_of_millisecs_in_day) as i32, - (as_millisecs % number_of_millisecs_in_day) as i32, + (as_millisecs / MILLISECS_IN_ONE_DAY) as i32, + (as_millisecs % MILLISECS_IN_ONE_DAY) as i32, ), ))) } - // 60 * 60 * 24 * 1000_000_000 = 86_400_000_000_000, number of nanosecs in a day IntervalMode::Nano => { + const NANOSECS_IN_ONE_DAY: i64 = 86_400_000_000_000; let as_nanosecs = delta_secs.num_nanoseconds().ok_or_else(|| { DataFusionError::Execution(String::from( - "timestamp difference cannot be shown in nanosecond precision", + "Can not compute timestamp differences with nanosecond precision", )) })?; Ok(ScalarValue::IntervalMonthDayNano(Some( IntervalMonthDayNanoType::make_value( 0, - (as_nanosecs / (number_of_millisecs_in_day * 1_000_000)) as i32, - as_nanosecs % (number_of_millisecs_in_day * 1_000_000), + (as_nanosecs / NANOSECS_IN_ONE_DAY) as i32, + as_nanosecs % NANOSECS_IN_ONE_DAY, ), ))) } } } + +/// This function creates the [`NaiveDateTime`] object corresponding to the +/// given timestamp using the units (tick size) implied by argument `mode`. #[inline] fn with_timezone_to_naive_datetime( - lhs_ts: &i64, - rhs_ts: &i64, - lhs_tz: &Option, - rhs_tz: &Option, - mode: &IntervalMode, -) -> Result<(NaiveDateTime, NaiveDateTime), DataFusionError> { - let (naive_lhs, naive_rhs) = match mode { - IntervalMode::Milli => ms_to_naive_datetime(lhs_ts, rhs_ts)?, - IntervalMode::Nano => ns_to_naive_datetime(lhs_ts, rhs_ts)?, + ts: i64, + tz: &Option, + mode: IntervalMode, +) -> Result { + let mut result = if let IntervalMode::Milli = mode { + ticks_to_naive_datetime::<1_000_000>(ts) + } else { + ticks_to_naive_datetime::<1>(ts) + }?; + if let Some(tz) = tz { + let offset = parse_tz_to_offset(tz)?; + result = DateTime::::from_utc(result, offset).naive_local(); }; - - match (lhs_tz, rhs_tz) { - (Some(l), Some(r)) => match (parse_tz_to_offset(l), parse_tz_to_offset(r)) { - (Ok(l), Ok(r)) => Ok(( - DateTime::::from_utc(naive_lhs, l).naive_local(), - DateTime::::from_utc(naive_rhs, r).naive_local(), - )), - (_, _) => Ok((naive_lhs, naive_rhs)), - }, - (_, _) => Ok((naive_lhs, naive_rhs)), - } -} -#[inline] -fn ms_to_naive_datetime( - lhs_ts_ms: &i64, - rhs_ts_ms: &i64, -) -> Result<(NaiveDateTime, NaiveDateTime), DataFusionError> { - match ( - NaiveDateTime::from_timestamp_opt( - lhs_ts_ms / 1_000, - (lhs_ts_ms % 1_000) as u32 * 1_000_000, - ), - NaiveDateTime::from_timestamp_opt( - rhs_ts_ms / 1_000, - (rhs_ts_ms % 1_000) as u32 * 1_000_000, - ), - ) { - (Some(x), Some(y)) => Ok((x, y)), - (x, y) => Err(DataFusionError::Execution(format!( - "timestamps {x:?} or {y:?} cannot be converted to NaiveDateTime", - ))), - } + Ok(result) } + +/// This function creates the [`NaiveDateTime`] object corresponding to the +/// given timestamp, whose tick size is specified by `UNIT_NANOS`. #[inline] -fn ns_to_naive_datetime( - lhs_ts_ns: &i64, - rhs_ts_ns: &i64, -) -> Result<(NaiveDateTime, NaiveDateTime), DataFusionError> { - match ( - NaiveDateTime::from_timestamp_opt( - lhs_ts_ns / 1_000_000_000, - (lhs_ts_ns % 1_000_000_000) as u32, - ), - NaiveDateTime::from_timestamp_opt( - rhs_ts_ns / 1_000_000_000, - (rhs_ts_ns % 1_000_000_000) as u32, - ), - ) { - (Some(x), Some(y)) => Ok((x, y)), - (x, y) => Err(DataFusionError::Execution(format!( - "timestamps {x:?} or {y:?} cannot be converted to NaiveDateTime", - ))), - } +fn ticks_to_naive_datetime(ticks: i64) -> Result { + NaiveDateTime::from_timestamp_opt( + (ticks * UNIT_NANOS) / 1_000_000_000, + ((ticks * UNIT_NANOS) % 1_000_000_000) as u32, + ) + .ok_or_else(|| { + DataFusionError::Execution( + "Can not convert given timestamp to a NaiveDateTime".to_string(), + ) + }) } -// This function parses as the format of "+HH:MM", for example, "+05:30" -#[inline] -fn parse_tz_to_offset(tz: &str) -> Result { - let err_str = &String::from("error while parsing timezone"); - let err = || DataFusionError::Execution(err_str.to_string()); - let sign = tz.chars().next().ok_or_else(err)?; +/// This function parses `tz` according to the format "+HH:MM" (e.g. "+05:30") +/// and retuns a [`FixedOffset`] object. +#[inline] +fn parse_tz_to_offset(tz: &str) -> Result { + const ERR_MSG: &str = "Can not parse timezone"; + let sign = tz + .chars() + .next() + .ok_or_else(|| DataFusionError::Execution(ERR_MSG.to_string()))?; let hours = tz[1..3] .parse::() - .map_err(|_e| DataFusionError::Execution(err_str.to_string()))?; + .map_err(|_| DataFusionError::Execution(ERR_MSG.to_string()))?; let minutes = tz[4..6] .parse::() - .map_err(|_e| DataFusionError::Execution(err_str.to_string()))?; - let timezone_offset = match sign { - '-' => FixedOffset::east_opt(hours * 3600 + minutes * 60).ok_or_else(err)?, - '+' => FixedOffset::west_opt(hours * 3600 + minutes * 60).ok_or_else(err)?, - _ => { - return Err(DataFusionError::Execution(err_str.to_string())); - } - }; - Ok(timezone_offset) + .map_err(|_| DataFusionError::Execution(ERR_MSG.to_string()))?; + match sign { + '-' => FixedOffset::east_opt(hours * 3600 + minutes * 60), + '+' => FixedOffset::west_opt(hours * 3600 + minutes * 60), + _ => None, + } + .ok_or_else(|| DataFusionError::Execution(ERR_MSG.to_string())) } #[inline] pub fn date32_add(days: i32, scalar: &ScalarValue, sign: i32) -> Result { let epoch = NaiveDate::from_ymd_opt(1970, 1, 1).unwrap(); let prior = epoch.add(Duration::days(days as i64)); - let posterior = do_date_math(prior, scalar, sign)?; - Ok(posterior.sub(epoch).num_days() as i32) + do_date_math(prior, scalar, sign).map(|d| d.sub(epoch).num_days() as i32) } #[inline] pub fn date64_add(ms: i64, scalar: &ScalarValue, sign: i32) -> Result { let epoch = NaiveDate::from_ymd_opt(1970, 1, 1).unwrap(); let prior = epoch.add(Duration::milliseconds(ms)); - let posterior = do_date_math(prior, scalar, sign)?; - Ok(posterior.sub(epoch).num_milliseconds()) + do_date_math(prior, scalar, sign).map(|d| d.sub(epoch).num_milliseconds()) } #[inline] pub fn seconds_add(ts_s: i64, scalar: &ScalarValue, sign: i32) -> Result { - Ok(do_date_time_math(ts_s, 0, scalar, sign)?.timestamp()) + do_date_time_math(ts_s, 0, scalar, sign).map(|dt| dt.timestamp()) } #[inline] pub fn milliseconds_add(ts_ms: i64, scalar: &ScalarValue, sign: i32) -> Result { let secs = ts_ms / 1000; let nsecs = ((ts_ms % 1000) * 1_000_000) as u32; - Ok(do_date_time_math(secs, nsecs, scalar, sign)?.timestamp_millis()) + do_date_time_math(secs, nsecs, scalar, sign).map(|dt| dt.timestamp_millis()) } #[inline] pub fn microseconds_add(ts_us: i64, scalar: &ScalarValue, sign: i32) -> Result { let secs = ts_us / 1_000_000; let nsecs = ((ts_us % 1_000_000) * 1000) as u32; - Ok(do_date_time_math(secs, nsecs, scalar, sign)?.timestamp_nanos() / 1000) + do_date_time_math(secs, nsecs, scalar, sign).map(|dt| dt.timestamp_nanos() / 1000) } #[inline] pub fn nanoseconds_add(ts_ns: i64, scalar: &ScalarValue, sign: i32) -> Result { let secs = ts_ns / 1_000_000_000; let nsecs = (ts_ns % 1_000_000_000) as u32; - Ok(do_date_time_math(secs, nsecs, scalar, sign)?.timestamp_nanos()) + do_date_time_math(secs, nsecs, scalar, sign).map(|dt| dt.timestamp_nanos()) } #[inline] @@ -4717,16 +4666,15 @@ mod tests { #[test] fn timestamp_op_tests() { // positive interval, edge cases - let test_data = get_test_data(1); - - for (lhs, rhs, expected) in test_data.iter() { - assert_eq!(expected, &lhs.sub(rhs).unwrap()) + let test_data = get_timestamp_test_data(1); + for (lhs, rhs, expected) in test_data.into_iter() { + assert_eq!(expected, lhs.sub(rhs).unwrap()) } // negative interval, edge cases - let test_data = get_test_data(-1); - for (rhs, lhs, expected) in test_data.iter() { - assert_eq!(expected, &lhs.sub(rhs).unwrap()); + let test_data = get_timestamp_test_data(-1); + for (rhs, lhs, expected) in test_data.into_iter() { + assert_eq!(expected, lhs.sub(rhs).unwrap()); } } #[test] @@ -4764,8 +4712,10 @@ mod tests { } } - fn get_test_data(sign: i32) -> Vec<(ScalarValue, ScalarValue, ScalarValue)> { - let test_data = vec![ + fn get_timestamp_test_data( + sign: i32, + ) -> Vec<(ScalarValue, ScalarValue, ScalarValue)> { + vec![ ( // 1st test case ScalarValue::TimestampNanosecond( @@ -5009,9 +4959,7 @@ mod tests { 0, ))), ), - ]; - - test_data + ] } fn get_random_timestamps(sample_size: u64) -> Vec { @@ -5081,30 +5029,34 @@ mod tests { let vector_size = sample_size; let mut intervals = vec![]; let mut rng = rand::thread_rng(); + const SECS_IN_ONE_DAY: i32 = 86_400; + const MILLISECS_IN_ONE_DAY: i32 = 86_400_000; + const MICROSECS_IN_ONE_DAY: i64 = 86_400_000_000; + const NANOSECS_IN_ONE_DAY: i64 = 86_400_000_000_000; for i in 0..vector_size { if i % 4 == 0 { let days = rng.gen_range(0..5000); // to not break second precision - let millis = rng.gen_range(0..86_400) * 1000; + let millis = rng.gen_range(0..SECS_IN_ONE_DAY) * 1000; intervals.push(ScalarValue::IntervalDayTime(Some( IntervalDayTimeType::make_value(days, millis), ))) } else if i % 4 == 1 { let days = rng.gen_range(0..5000); - let millisec = rng.gen_range(0..86_400_000); + let millisec = rng.gen_range(0..MILLISECS_IN_ONE_DAY); intervals.push(ScalarValue::IntervalDayTime(Some( IntervalDayTimeType::make_value(days, millisec), ))) } else if i % 4 == 2 { let days = rng.gen_range(0..5000); // to not break microsec precision - let nanosec = rng.gen_range(0..86_400_000_000) * 1000; + let nanosec = rng.gen_range(0..MICROSECS_IN_ONE_DAY) * 1000; intervals.push(ScalarValue::IntervalMonthDayNano(Some( IntervalMonthDayNanoType::make_value(0, days, nanosec), ))) } else { let days = rng.gen_range(0..5000); - let nanosec = rng.gen_range(0..86_400_000_000_000); + let nanosec = rng.gen_range(0..NANOSECS_IN_ONE_DAY); intervals.push(ScalarValue::IntervalMonthDayNano(Some( IntervalMonthDayNanoType::make_value(0, days, nanosec), ))); From ed0446676a8f3bb35b0b3544d195fd94f600cfbe Mon Sep 17 00:00:00 2001 From: Mehmet Ozan Kabak Date: Mon, 13 Mar 2023 22:23:02 -0500 Subject: [PATCH 17/23] Make ScalarValue support interval comparison --- datafusion/common/src/scalar.rs | 486 +++++++++++++++++++++++++++++++- 1 file changed, 481 insertions(+), 5 deletions(-) diff --git a/datafusion/common/src/scalar.rs b/datafusion/common/src/scalar.rs index bedbd1a328c5..3d7ff1314f27 100644 --- a/datafusion/common/src/scalar.rs +++ b/datafusion/common/src/scalar.rs @@ -45,6 +45,12 @@ use arrow::{ }; use chrono::{DateTime, Datelike, Duration, FixedOffset, NaiveDate, NaiveDateTime}; +// Constants we use throughout this file: +const MILLISECS_IN_ONE_DAY: i64 = 86_400_000; +const NANOSECS_IN_ONE_DAY: i64 = 86_400_000_000_000; +const MILLISECS_IN_ONE_MONTH: i64 = 2_592_000_000; // assuming 30 days. +const NANOSECS_IN_ONE_MONTH: i128 = 2_592_000_000_000_000; // assuming 30 days. + /// Represents a dynamically typed, nullable single value. /// This is the single-valued counter-part to arrow's [`Array`]. /// @@ -199,10 +205,28 @@ impl PartialEq for ScalarValue { (TimestampNanosecond(v1, _), TimestampNanosecond(v2, _)) => v1.eq(v2), (TimestampNanosecond(_, _), _) => false, (IntervalYearMonth(v1), IntervalYearMonth(v2)) => v1.eq(v2), + (IntervalYearMonth(v1), IntervalDayTime(v2)) => { + ym_to_milli(v1).eq(&dt_to_milli(v2)) + } + (IntervalYearMonth(v1), IntervalMonthDayNano(v2)) => { + ym_to_nano(v1).eq(&mdn_to_nano(v2)) + } (IntervalYearMonth(_), _) => false, (IntervalDayTime(v1), IntervalDayTime(v2)) => v1.eq(v2), + (IntervalDayTime(v1), IntervalYearMonth(v2)) => { + dt_to_milli(v1).eq(&ym_to_milli(v2)) + } + (IntervalDayTime(v1), IntervalMonthDayNano(v2)) => { + dt_to_nano(v1).eq(&mdn_to_nano(v2)) + } (IntervalDayTime(_), _) => false, (IntervalMonthDayNano(v1), IntervalMonthDayNano(v2)) => v1.eq(v2), + (IntervalMonthDayNano(v1), IntervalYearMonth(v2)) => { + mdn_to_nano(v1).eq(&ym_to_nano(v2)) + } + (IntervalMonthDayNano(v1), IntervalDayTime(v2)) => { + mdn_to_nano(v1).eq(&dt_to_nano(v2)) + } (IntervalMonthDayNano(_), _) => false, (Struct(v1, t1), Struct(v2, t2)) => v1.eq(v2) && t1.eq(t2), (Struct(_, _), _) => false, @@ -304,10 +328,28 @@ impl PartialOrd for ScalarValue { } (TimestampNanosecond(_, _), _) => None, (IntervalYearMonth(v1), IntervalYearMonth(v2)) => v1.partial_cmp(v2), + (IntervalYearMonth(v1), IntervalDayTime(v2)) => { + ym_to_milli(v1).partial_cmp(&dt_to_milli(v2)) + } + (IntervalYearMonth(v1), IntervalMonthDayNano(v2)) => { + ym_to_nano(v1).partial_cmp(&mdn_to_nano(v2)) + } (IntervalYearMonth(_), _) => None, (IntervalDayTime(v1), IntervalDayTime(v2)) => v1.partial_cmp(v2), + (IntervalDayTime(v1), IntervalYearMonth(v2)) => { + dt_to_milli(v1).partial_cmp(&ym_to_milli(v2)) + } + (IntervalDayTime(v1), IntervalMonthDayNano(v2)) => { + dt_to_nano(v1).partial_cmp(&mdn_to_nano(v2)) + } (IntervalDayTime(_), _) => None, (IntervalMonthDayNano(v1), IntervalMonthDayNano(v2)) => v1.partial_cmp(v2), + (IntervalMonthDayNano(v1), IntervalYearMonth(v2)) => { + mdn_to_nano(v1).partial_cmp(&ym_to_nano(v2)) + } + (IntervalMonthDayNano(v1), IntervalDayTime(v2)) => { + mdn_to_nano(v1).partial_cmp(&dt_to_nano(v2)) + } (IntervalMonthDayNano(_), _) => None, (Struct(v1, t1), Struct(v2, t2)) => { if t1.eq(t2) { @@ -332,6 +374,52 @@ impl PartialOrd for ScalarValue { } } +/// This function computes the duration (in milliseconds) of the given +/// year-month-interval. +#[inline] +fn ym_to_milli(val: &Option) -> Option { + val.map(|value| (value as i64) * MILLISECS_IN_ONE_MONTH) +} + +/// This function computes the duration (in nanoseconds) of the given +/// year-month-interval. +#[inline] +fn ym_to_nano(val: &Option) -> Option { + val.map(|value| (value as i128) * NANOSECS_IN_ONE_MONTH) +} + +/// This function computes the duration (in milliseconds) of the given +/// daytime-interval. +#[inline] +fn dt_to_milli(val: &Option) -> Option { + val.map(|val| { + let (days, millis) = IntervalDayTimeType::to_parts(val); + (days as i64) * MILLISECS_IN_ONE_DAY + (millis as i64) + }) +} + +/// This function computes the duration (in nanoseconds) of the given +/// daytime-interval. +#[inline] +fn dt_to_nano(val: &Option) -> Option { + val.map(|val| { + let (days, millis) = IntervalDayTimeType::to_parts(val); + (days as i128) * (NANOSECS_IN_ONE_DAY as i128) + (millis as i128) * 1_000_000 + }) +} + +/// This function computes the duration (in nanoseconds) of the given +/// month-day-nano-interval. Assumes a month is 30 days long. +#[inline] +fn mdn_to_nano(val: &Option) -> Option { + val.map(|val| { + let (months, days, nanos) = IntervalMonthDayNanoType::to_parts(val); + (months as i128) * NANOSECS_IN_ONE_MONTH + + (days as i128) * (NANOSECS_IN_ONE_DAY as i128) + + (nanos as i128) + }) +} + impl Eq for ScalarValue {} // TODO implement this in arrow-rs with simd @@ -568,6 +656,43 @@ macro_rules! impl_op_symmetric { (ScalarValue::Int8(lhs), ScalarValue::Int8(rhs)) => { primitive_op!(lhs, rhs, Int8, $OPERATION) } + ( + ScalarValue::IntervalYearMonth(Some(lhs)), + ScalarValue::IntervalYearMonth(Some(rhs)), + ) => Ok(ScalarValue::IntervalYearMonth(Some( + IntervalYearMonthType::make_value(0, lhs + rhs * get_sign!($OPERATION)), + ))), + ( + ScalarValue::IntervalDayTime(Some(lhs)), + ScalarValue::IntervalDayTime(Some(rhs)), + ) => { + let sign = get_sign!($OPERATION); + let (lhs_days, lhs_millis) = IntervalDayTimeType::to_parts(*lhs); + let (rhs_days, rhs_millis) = IntervalDayTimeType::to_parts(*rhs); + Ok(ScalarValue::IntervalDayTime(Some( + IntervalDayTimeType::make_value( + lhs_days + rhs_days * sign, + lhs_millis + rhs_millis * sign, + ), + ))) + } + ( + ScalarValue::IntervalMonthDayNano(Some(lhs)), + ScalarValue::IntervalMonthDayNano(Some(rhs)), + ) => { + let sign = get_sign!($OPERATION); + let (lhs_months, lhs_days, lhs_nanos) = + IntervalMonthDayNanoType::to_parts(*lhs); + let (rhs_months, rhs_days, rhs_nanos) = + IntervalMonthDayNanoType::to_parts(*rhs); + Ok(ScalarValue::IntervalMonthDayNano(Some( + IntervalMonthDayNanoType::make_value( + lhs_months + rhs_months * sign, + lhs_days + rhs_days * sign, + lhs_nanos + rhs_nanos * (sign as i64), + ), + ))) + } // Binary operations on arguments with different types: (ScalarValue::Date32(Some(days)), _) => { let value = date32_add(*days, $RHS, get_sign!($OPERATION))?; @@ -609,6 +734,30 @@ macro_rules! impl_op_symmetric { let value = nanoseconds_add(*ts_ns, $LHS, get_sign!($OPERATION))?; Ok(ScalarValue::TimestampNanosecond(Some(value), zone.clone())) } + ( + ScalarValue::IntervalYearMonth(Some(lhs)), + ScalarValue::IntervalDayTime(Some(rhs)), + ) => op_ym_dt(*lhs, *rhs, get_sign!($OPERATION), false), + ( + ScalarValue::IntervalYearMonth(Some(lhs)), + ScalarValue::IntervalMonthDayNano(Some(rhs)), + ) => op_ym_mdn(*lhs, *rhs, get_sign!($OPERATION), false), + ( + ScalarValue::IntervalDayTime(Some(lhs)), + ScalarValue::IntervalYearMonth(Some(rhs)), + ) => op_ym_dt(*rhs, *lhs, get_sign!($OPERATION), true), + ( + ScalarValue::IntervalDayTime(Some(lhs)), + ScalarValue::IntervalMonthDayNano(Some(rhs)), + ) => op_dt_mdn(*lhs, *rhs, get_sign!($OPERATION), false), + ( + ScalarValue::IntervalMonthDayNano(Some(lhs)), + ScalarValue::IntervalYearMonth(Some(rhs)), + ) => op_ym_mdn(*rhs, *lhs, get_sign!($OPERATION), true), + ( + ScalarValue::IntervalMonthDayNano(Some(lhs)), + ScalarValue::IntervalDayTime(Some(rhs)), + ) => op_dt_mdn(*rhs, *lhs, get_sign!($OPERATION), true), _ => Err(DataFusionError::Internal(format!( "Operator {} is not implemented for types {:?} and {:?}", stringify!($OPERATION), @@ -619,6 +768,72 @@ macro_rules! impl_op_symmetric { }; } +/// This function adds/subtracts two "raw" intervals (`lhs` and `rhs`) of different +/// types ([`IntervalYearMonthType`] and [`IntervalDayTimeType`], respectively). +/// The argument `sign` chooses between addition and subtraction, the argument +/// `commute` swaps `lhs` and `rhs`. The return value is an interval [`ScalarValue`] +/// with type data type [`IntervalMonthDayNanoType`]. +#[inline] +fn op_ym_dt(mut lhs: i32, rhs: i64, sign: i32, commute: bool) -> Result { + let (mut days, millis) = IntervalDayTimeType::to_parts(rhs); + let mut nanos = (millis as i64) * 1_000_000; + if commute { + lhs *= sign; + } else { + days *= sign; + nanos *= sign as i64; + }; + Ok(ScalarValue::IntervalMonthDayNano(Some( + IntervalMonthDayNanoType::make_value(lhs, days, nanos), + ))) +} + +/// This function adds/subtracts two "raw" intervals (`lhs` and `rhs`) of different +/// types ([`IntervalYearMonthType`] and [`IntervalMonthDayNanoType`], respectively). +/// The argument `sign` chooses between addition and subtraction, the argument +/// `commute` swaps `lhs` and `rhs`. The return value is an interval [`ScalarValue`] +/// with type data type [`IntervalMonthDayNanoType`]. +#[inline] +fn op_ym_mdn(lhs: i32, rhs: i128, sign: i32, commute: bool) -> Result { + let (mut months, mut days, mut nanos) = IntervalMonthDayNanoType::to_parts(rhs); + if commute { + months += lhs * sign; + } else { + months = lhs + (months * sign); + days *= sign; + nanos *= sign as i64; + } + Ok(ScalarValue::IntervalMonthDayNano(Some( + IntervalMonthDayNanoType::make_value(months, days, nanos), + ))) +} + +/// This function adds/subtracts two "raw" intervals (`lhs` and `rhs`) of different +/// types ([`IntervalDayTimeType`] and [`IntervalMonthDayNanoType`], respectively). +/// The argument `sign` chooses between addition and subtraction, the argument +/// `commute` swaps `lhs` and `rhs`. The return value is an interval [`ScalarValue`] +/// with type data type [`IntervalMonthDayNanoType`]. +#[inline] +fn op_dt_mdn(lhs: i64, rhs: i128, sign: i32, commute: bool) -> Result { + let (lhs_days, lhs_millis) = IntervalDayTimeType::to_parts(lhs); + let (rhs_months, rhs_days, rhs_nanos) = IntervalMonthDayNanoType::to_parts(rhs); + + let result = if commute { + IntervalMonthDayNanoType::make_value( + rhs_months, + lhs_days * sign + rhs_days, + (lhs_millis * sign) as i64 * 1_000_000 + rhs_nanos, + ) + } else { + IntervalMonthDayNanoType::make_value( + rhs_months * sign, + lhs_days + rhs_days * sign, + (lhs_millis as i64) * 1_000_000 + rhs_nanos * (sign as i64), + ) + }; + Ok(ScalarValue::IntervalMonthDayNano(Some(result))) +} + macro_rules! get_sign { (+) => { 1 @@ -655,7 +870,6 @@ fn ts_sub_to_interval( match mode { IntervalMode::Milli => { - const MILLISECS_IN_ONE_DAY: i64 = 86_400_000; let as_millisecs = delta_secs.num_milliseconds(); Ok(ScalarValue::IntervalDayTime(Some( IntervalDayTimeType::make_value( @@ -665,7 +879,6 @@ fn ts_sub_to_interval( ))) } IntervalMode::Nano => { - const NANOSECS_IN_ONE_DAY: i64 = 86_400_000_000_000; let as_nanosecs = delta_secs.num_nanoseconds().ok_or_else(|| { DataFusionError::Execution(String::from( "Can not compute timestamp differences with nanosecond precision", @@ -3883,6 +4096,53 @@ mod tests { ])), None ); + // Different type of intervals can be compared. + assert!( + IntervalYearMonth(Some(IntervalYearMonthType::make_value(1, 2))) + < IntervalMonthDayNano(Some(IntervalMonthDayNanoType::make_value( + 14, 0, 1 + ))), + ); + assert!( + IntervalYearMonth(Some(IntervalYearMonthType::make_value(0, 4))) + >= IntervalDayTime(Some(IntervalDayTimeType::make_value(119, 1))) + ); + assert!( + IntervalDayTime(Some(IntervalDayTimeType::make_value(12, 86_399_999))) + >= IntervalDayTime(Some(IntervalDayTimeType::make_value(12, 0))) + ); + assert!( + IntervalYearMonth(Some(IntervalYearMonthType::make_value(2, 12))) + == IntervalMonthDayNano(Some(IntervalMonthDayNanoType::make_value( + 36, 0, 0 + ))), + ); + assert!( + IntervalYearMonth(Some(IntervalYearMonthType::make_value(0, 0))) + != IntervalDayTime(Some(IntervalDayTimeType::make_value(0, 1))) + ); + assert!( + IntervalYearMonth(Some(IntervalYearMonthType::make_value(1, 4))) + == IntervalYearMonth(Some(IntervalYearMonthType::make_value(0, 16))), + ); + assert!( + IntervalYearMonth(Some(IntervalYearMonthType::make_value(0, 3))) + > IntervalMonthDayNano(Some(IntervalMonthDayNanoType::make_value( + 2, + 28, + 999_999_999 + ))), + ); + assert!( + IntervalYearMonth(Some(IntervalYearMonthType::make_value(0, 1))) + > IntervalDayTime(Some(IntervalDayTimeType::make_value(29, 9_999))), + ); + assert!( + IntervalMonthDayNano(Some(IntervalMonthDayNanoType::make_value(1, 12, 34))) + > IntervalMonthDayNano(Some(IntervalMonthDayNanoType::make_value( + 0, 142, 34 + ))) + ); } #[test] @@ -4663,6 +4923,224 @@ mod tests { } } + #[test] + fn test_scalar_interval_add() { + let cases = [ + ( + ScalarValue::IntervalYearMonth(Some(IntervalYearMonthType::make_value( + 1, 12, + ))), + ScalarValue::IntervalYearMonth(Some(IntervalYearMonthType::make_value( + 1, 12, + ))), + ScalarValue::IntervalYearMonth(Some(IntervalYearMonthType::make_value( + 2, 24, + ))), + ), + ( + ScalarValue::IntervalDayTime(Some(IntervalDayTimeType::make_value( + 1, 999, + ))), + ScalarValue::IntervalDayTime(Some(IntervalDayTimeType::make_value( + 1, 999, + ))), + ScalarValue::IntervalDayTime(Some(IntervalDayTimeType::make_value( + 2, 1998, + ))), + ), + ( + ScalarValue::IntervalMonthDayNano(Some( + IntervalMonthDayNanoType::make_value(12, 15, 123_456), + )), + ScalarValue::IntervalMonthDayNano(Some( + IntervalMonthDayNanoType::make_value(12, 15, 123_456), + )), + ScalarValue::IntervalMonthDayNano(Some( + IntervalMonthDayNanoType::make_value(24, 30, 246_912), + )), + ), + ( + ScalarValue::IntervalYearMonth(Some(IntervalYearMonthType::make_value( + 0, 1, + ))), + ScalarValue::IntervalDayTime(Some(IntervalDayTimeType::make_value( + 29, 86_390, + ))), + ScalarValue::IntervalMonthDayNano(Some( + IntervalMonthDayNanoType::make_value(1, 29, 86_390_000_000), + )), + ), + ( + ScalarValue::IntervalYearMonth(Some(IntervalYearMonthType::make_value( + 0, 1, + ))), + ScalarValue::IntervalMonthDayNano(Some( + IntervalMonthDayNanoType::make_value(2, 10, 999_999_999), + )), + ScalarValue::IntervalMonthDayNano(Some( + IntervalMonthDayNanoType::make_value(3, 10, 999_999_999), + )), + ), + ( + ScalarValue::IntervalDayTime(Some(IntervalDayTimeType::make_value( + 400, 123_456, + ))), + ScalarValue::IntervalYearMonth(Some(IntervalYearMonthType::make_value( + 1, 1, + ))), + ScalarValue::IntervalMonthDayNano(Some( + IntervalMonthDayNanoType::make_value(13, 400, 123_456_000_000), + )), + ), + ( + ScalarValue::IntervalDayTime(Some(IntervalDayTimeType::make_value( + 65, 321, + ))), + ScalarValue::IntervalMonthDayNano(Some( + IntervalMonthDayNanoType::make_value(2, 5, 1_000_000), + )), + ScalarValue::IntervalMonthDayNano(Some( + IntervalMonthDayNanoType::make_value(2, 70, 322_000_000), + )), + ), + ( + ScalarValue::IntervalMonthDayNano(Some( + IntervalMonthDayNanoType::make_value(12, 15, 123_456), + )), + ScalarValue::IntervalYearMonth(Some(IntervalYearMonthType::make_value( + 2, 0, + ))), + ScalarValue::IntervalMonthDayNano(Some( + IntervalMonthDayNanoType::make_value(36, 15, 123_456), + )), + ), + ( + ScalarValue::IntervalMonthDayNano(Some( + IntervalMonthDayNanoType::make_value(12, 15, 100_000), + )), + ScalarValue::IntervalDayTime(Some(IntervalDayTimeType::make_value( + 370, 1, + ))), + ScalarValue::IntervalMonthDayNano(Some( + IntervalMonthDayNanoType::make_value(12, 385, 1_100_000), + )), + ), + ]; + for (lhs, rhs, expected) in cases.iter() { + let result = lhs.add(rhs).unwrap(); + let result_commute = rhs.add(lhs).unwrap(); + assert_eq!(*expected, result, "lhs:{:?} + rhs:{:?}", lhs, rhs); + assert_eq!(*expected, result_commute, "lhs:{:?} + rhs:{:?}", rhs, lhs); + } + } + + #[test] + fn test_scalar_interval_sub() { + let cases = [ + ( + ScalarValue::IntervalYearMonth(Some(IntervalYearMonthType::make_value( + 1, 12, + ))), + ScalarValue::IntervalYearMonth(Some(IntervalYearMonthType::make_value( + 1, 12, + ))), + ScalarValue::IntervalYearMonth(Some(IntervalYearMonthType::make_value( + 0, 0, + ))), + ), + ( + ScalarValue::IntervalDayTime(Some(IntervalDayTimeType::make_value( + 1, 999, + ))), + ScalarValue::IntervalDayTime(Some(IntervalDayTimeType::make_value( + 1, 999, + ))), + ScalarValue::IntervalDayTime(Some(IntervalDayTimeType::make_value(0, 0))), + ), + ( + ScalarValue::IntervalMonthDayNano(Some( + IntervalMonthDayNanoType::make_value(12, 15, 123_456), + )), + ScalarValue::IntervalMonthDayNano(Some( + IntervalMonthDayNanoType::make_value(12, 15, 123_456), + )), + ScalarValue::IntervalMonthDayNano(Some( + IntervalMonthDayNanoType::make_value(0, 0, 0), + )), + ), + ( + ScalarValue::IntervalYearMonth(Some(IntervalYearMonthType::make_value( + 0, 1, + ))), + ScalarValue::IntervalDayTime(Some(IntervalDayTimeType::make_value( + 29, 999_999, + ))), + ScalarValue::IntervalMonthDayNano(Some( + IntervalMonthDayNanoType::make_value(1, -29, -999_999_000_000), + )), + ), + ( + ScalarValue::IntervalYearMonth(Some(IntervalYearMonthType::make_value( + 0, 1, + ))), + ScalarValue::IntervalMonthDayNano(Some( + IntervalMonthDayNanoType::make_value(2, 10, 999_999_999), + )), + ScalarValue::IntervalMonthDayNano(Some( + IntervalMonthDayNanoType::make_value(-1, -10, -999_999_999), + )), + ), + ( + ScalarValue::IntervalDayTime(Some(IntervalDayTimeType::make_value( + 400, 123_456, + ))), + ScalarValue::IntervalYearMonth(Some(IntervalYearMonthType::make_value( + 1, 1, + ))), + ScalarValue::IntervalMonthDayNano(Some( + IntervalMonthDayNanoType::make_value(-13, 400, 123_456_000_000), + )), + ), + ( + ScalarValue::IntervalDayTime(Some(IntervalDayTimeType::make_value( + 65, 321, + ))), + ScalarValue::IntervalMonthDayNano(Some( + IntervalMonthDayNanoType::make_value(2, 5, 1_000_000), + )), + ScalarValue::IntervalMonthDayNano(Some( + IntervalMonthDayNanoType::make_value(-2, 60, 320_000_000), + )), + ), + ( + ScalarValue::IntervalMonthDayNano(Some( + IntervalMonthDayNanoType::make_value(12, 15, 123_456), + )), + ScalarValue::IntervalYearMonth(Some(IntervalYearMonthType::make_value( + 2, 0, + ))), + ScalarValue::IntervalMonthDayNano(Some( + IntervalMonthDayNanoType::make_value(-12, 15, 123_456), + )), + ), + ( + ScalarValue::IntervalMonthDayNano(Some( + IntervalMonthDayNanoType::make_value(12, 15, 100_000), + )), + ScalarValue::IntervalDayTime(Some(IntervalDayTimeType::make_value( + 370, 1, + ))), + ScalarValue::IntervalMonthDayNano(Some( + IntervalMonthDayNanoType::make_value(12, -355, -900_000), + )), + ), + ]; + for (lhs, rhs, expected) in cases.iter() { + let result = lhs.sub(rhs).unwrap(); + assert_eq!(*expected, result, "lhs:{:?} - rhs:{:?}", lhs, rhs); + } + } + #[test] fn timestamp_op_tests() { // positive interval, edge cases @@ -5030,9 +5508,7 @@ mod tests { let mut intervals = vec![]; let mut rng = rand::thread_rng(); const SECS_IN_ONE_DAY: i32 = 86_400; - const MILLISECS_IN_ONE_DAY: i32 = 86_400_000; const MICROSECS_IN_ONE_DAY: i64 = 86_400_000_000; - const NANOSECS_IN_ONE_DAY: i64 = 86_400_000_000_000; for i in 0..vector_size { if i % 4 == 0 { let days = rng.gen_range(0..5000); @@ -5043,7 +5519,7 @@ mod tests { ))) } else if i % 4 == 1 { let days = rng.gen_range(0..5000); - let millisec = rng.gen_range(0..MILLISECS_IN_ONE_DAY); + let millisec = rng.gen_range(0..(MILLISECS_IN_ONE_DAY as i32)); intervals.push(ScalarValue::IntervalDayTime(Some( IntervalDayTimeType::make_value(days, millisec), ))) From 3bf8fd6e1b9b0542bfc5a886acc13a6410b26360 Mon Sep 17 00:00:00 2001 From: berkaysynnada Date: Tue, 14 Mar 2023 11:26:37 +0300 Subject: [PATCH 18/23] naming tests --- datafusion/common/src/scalar.rs | 93 +++++++++++++++++---------------- 1 file changed, 47 insertions(+), 46 deletions(-) diff --git a/datafusion/common/src/scalar.rs b/datafusion/common/src/scalar.rs index 3d7ff1314f27..a9e25f1883d3 100644 --- a/datafusion/common/src/scalar.rs +++ b/datafusion/common/src/scalar.rs @@ -856,7 +856,7 @@ enum IntervalMode { /// - When subtracting timestamps at seconds/milliseconds precision, the output /// interval will have the type [`IntervalDayTimeType`]. /// - When subtracting timestamps at microseconds/nanoseconds precision, the -/// output interval will have the type [`IntervalMonthDayNano`]. +/// output interval will have the type [`IntervalMonthDayNanoType`]. fn ts_sub_to_interval( lhs_ts: i64, rhs_ts: i64, @@ -5195,16 +5195,18 @@ mod tests { ) -> Vec<(ScalarValue, ScalarValue, ScalarValue)> { vec![ ( - // 1st test case + // 1st test case, having the same time but different with timezones + // Since they are timestamps with nanosecond precision, expected type is + // [`IntervalMonthDayNanoType`] ScalarValue::TimestampNanosecond( Some( NaiveDate::from_ymd_opt(2023, 1, 1) .unwrap() - .and_hms_nano_opt(1, 0, 0, 000_000_000) + .and_hms_nano_opt(12, 0, 0, 000_000_000) .unwrap() .timestamp_nanos(), ), - Some("+01:00".to_string()), + Some("+12:00".to_string()), ), ScalarValue::TimestampNanosecond( Some( @@ -5220,7 +5222,7 @@ mod tests { IntervalMonthDayNanoType::make_value(0, 0, 0), )), ), - // 2nd test case + // 2nd test case, january with 31 days plus february with 28 days, with timezone ( ScalarValue::TimestampMicrosecond( Some( @@ -5246,11 +5248,11 @@ mod tests { IntervalMonthDayNanoType::make_value(0, sign * 59, 0), )), ), - // 3rd test case + // 3rd test case, 29-days long february minus previous, year with timezone ( ScalarValue::TimestampMillisecond( Some( - NaiveDate::from_ymd_opt(2023, 2, 11) + NaiveDate::from_ymd_opt(2024, 2, 29) .unwrap() .and_hms_milli_opt(10, 10, 0, 000) .unwrap() @@ -5260,7 +5262,7 @@ mod tests { ), ScalarValue::TimestampMillisecond( Some( - NaiveDate::from_ymd_opt(2023, 1, 1) + NaiveDate::from_ymd_opt(2023, 12, 31) .unwrap() .and_hms_milli_opt(1, 0, 0, 000) .unwrap() @@ -5269,15 +5271,16 @@ mod tests { Some("+01:00".to_string()), ), ScalarValue::IntervalDayTime(Some(IntervalDayTimeType::make_value( - sign * 41, + sign * 60, 0, ))), ), - // 4th test case + // 4th test case, leap years occur mostly every 4 years, but every 100 years + // we skip a leap year unless the year is divisible by 400, so 31 + 28 = 59 ( ScalarValue::TimestampSecond( Some( - NaiveDate::from_ymd_opt(2023, 3, 1) + NaiveDate::from_ymd_opt(2100, 3, 1) .unwrap() .and_hms_opt(0, 0, 0) .unwrap() @@ -5287,7 +5290,7 @@ mod tests { ), ScalarValue::TimestampSecond( Some( - NaiveDate::from_ymd_opt(2023, 1, 1) + NaiveDate::from_ymd_opt(2100, 1, 1) .unwrap() .and_hms_opt(23, 58, 0) .unwrap() @@ -5300,17 +5303,18 @@ mod tests { 0, ))), ), - // 5th test case + // 5th test case, without timezone positively seemed, but with timezone, + // negative resulting interval ( ScalarValue::TimestampMillisecond( Some( - NaiveDate::from_ymd_opt(2023, 3, 1) + NaiveDate::from_ymd_opt(2023, 1, 1) .unwrap() - .and_hms_milli_opt(23, 58, 0, 250) + .and_hms_milli_opt(6, 00, 0, 000) .unwrap() .timestamp_millis(), ), - Some("+11:59".to_string()), + Some("+06:00".to_string()), ), ScalarValue::TimestampMillisecond( Some( @@ -5320,20 +5324,20 @@ mod tests { .unwrap() .timestamp_millis(), ), - Some("-11:59".to_string()), + Some("-12:00".to_string()), ), ScalarValue::IntervalDayTime(Some(IntervalDayTimeType::make_value( - sign * 59, - sign * 250, + 0, + sign * -43_200_000, ))), ), - // 6th test case + // 6th test case, no problem before unix epoch beginning ( ScalarValue::TimestampMicrosecond( Some( - NaiveDate::from_ymd_opt(2023, 3, 1) + NaiveDate::from_ymd_opt(1970, 1, 1) .unwrap() - .and_hms_micro_opt(0, 0, 0, 15) + .and_hms_micro_opt(1, 2, 3, 15) .unwrap() .timestamp_micros(), ), @@ -5341,7 +5345,7 @@ mod tests { ), ScalarValue::TimestampMicrosecond( Some( - NaiveDate::from_ymd_opt(2023, 1, 1) + NaiveDate::from_ymd_opt(1969, 1, 1) .unwrap() .and_hms_micro_opt(0, 0, 0, 000_000) .unwrap() @@ -5352,18 +5356,18 @@ mod tests { ScalarValue::IntervalMonthDayNano(Some( IntervalMonthDayNanoType::make_value( 0, - sign * 59, - sign as i64 * 15_000, + 365 * sign, + sign as i64 * 3_723_000_015_000, ), )), ), - // 7th test case + // 7th test case, no problem with big intervals ( ScalarValue::TimestampNanosecond( Some( - NaiveDate::from_ymd_opt(2023, 3, 1) + NaiveDate::from_ymd_opt(2100, 1, 1) .unwrap() - .and_hms_nano_opt(0, 0, 0, 22) + .and_hms_nano_opt(0, 0, 0, 0) .unwrap() .timestamp_nanos(), ), @@ -5371,7 +5375,7 @@ mod tests { ), ScalarValue::TimestampNanosecond( Some( - NaiveDate::from_ymd_opt(2023, 1, 31) + NaiveDate::from_ymd_opt(2000, 1, 1) .unwrap() .and_hms_nano_opt(0, 0, 0, 000_000_000) .unwrap() @@ -5380,14 +5384,14 @@ mod tests { None, ), ScalarValue::IntervalMonthDayNano(Some( - IntervalMonthDayNanoType::make_value(0, sign * 29, sign as i64 * 22), + IntervalMonthDayNanoType::make_value(0, sign * 36525, 0), )), ), - // 8th test case + // 8th test case, no problem detecting 366-days long years ( ScalarValue::TimestampSecond( Some( - NaiveDate::from_ymd_opt(2023, 3, 1) + NaiveDate::from_ymd_opt(2041, 1, 1) .unwrap() .and_hms_opt(0, 0, 0) .unwrap() @@ -5397,45 +5401,42 @@ mod tests { ), ScalarValue::TimestampSecond( Some( - NaiveDate::from_ymd_opt(2021, 12, 30) + NaiveDate::from_ymd_opt(2040, 1, 1) .unwrap() - .and_hms_opt(0, 0, 30) + .and_hms_opt(0, 0, 0) .unwrap() .timestamp(), ), None, ), ScalarValue::IntervalDayTime(Some(IntervalDayTimeType::make_value( - sign * 425, - sign * 86370000, + sign * 366, + 0, ))), ), - // 9th test case + // 9th test case, no problem with unrealistic timezones ( ScalarValue::TimestampSecond( Some( - NaiveDate::from_ymd_opt(2023, 12, 1) + NaiveDate::from_ymd_opt(2023, 1, 3) .unwrap() .and_hms_opt(0, 0, 0) .unwrap() .timestamp(), ), - None, + Some("+23:59".to_string()), ), ScalarValue::TimestampSecond( Some( - NaiveDate::from_ymd_opt(1980, 11, 1) + NaiveDate::from_ymd_opt(2023, 1, 1) .unwrap() - .and_hms_opt(0, 0, 0) + .and_hms_opt(0, 2, 0) .unwrap() .timestamp(), ), - None, + Some("-23:59".to_string()), ), - ScalarValue::IntervalDayTime(Some(IntervalDayTimeType::make_value( - sign * 15735, - 0, - ))), + ScalarValue::IntervalDayTime(Some(IntervalDayTimeType::make_value(0, 0))), ), ] } From 0f8a7a74536d08404a0a66d4c0f987232762ed53 Mon Sep 17 00:00:00 2001 From: berkaysynnada Date: Tue, 14 Mar 2023 18:48:57 +0300 Subject: [PATCH 19/23] macro renaming --- datafusion/common/src/scalar.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/datafusion/common/src/scalar.rs b/datafusion/common/src/scalar.rs index a9e25f1883d3..0570cbe851a5 100644 --- a/datafusion/common/src/scalar.rs +++ b/datafusion/common/src/scalar.rs @@ -553,7 +553,7 @@ macro_rules! unsigned_subtraction_error { macro_rules! impl_op { ($LHS:expr, $RHS:expr, +) => { - impl_op_symmetric!($LHS, $RHS, +) + impl_op_dissociated!($LHS, $RHS, +) }; ($LHS:expr, $RHS:expr, -) => { match ($LHS, $RHS) { @@ -611,12 +611,12 @@ macro_rules! impl_op { tz_rhs, IntervalMode::Nano, ), - _ => impl_op_symmetric!($LHS, $RHS, -) + _ => impl_op_dissociated!($LHS, $RHS, -) } }; } -macro_rules! impl_op_symmetric { +macro_rules! impl_op_dissociated { ($LHS:expr, $RHS:expr, $OPERATION:tt) => { match ($LHS, $RHS) { // Binary operations on arguments with the same type: From cf892fefcc6bbe6cc304a378460f7c22a668b0b4 Mon Sep 17 00:00:00 2001 From: berkaysynnada Date: Tue, 14 Mar 2023 19:07:54 +0300 Subject: [PATCH 20/23] renaming macro --- datafusion/common/src/scalar.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/datafusion/common/src/scalar.rs b/datafusion/common/src/scalar.rs index 0570cbe851a5..36138c8130d6 100644 --- a/datafusion/common/src/scalar.rs +++ b/datafusion/common/src/scalar.rs @@ -553,7 +553,7 @@ macro_rules! unsigned_subtraction_error { macro_rules! impl_op { ($LHS:expr, $RHS:expr, +) => { - impl_op_dissociated!($LHS, $RHS, +) + impl_op_arithmetic!($LHS, $RHS, +) }; ($LHS:expr, $RHS:expr, -) => { match ($LHS, $RHS) { @@ -611,12 +611,12 @@ macro_rules! impl_op { tz_rhs, IntervalMode::Nano, ), - _ => impl_op_dissociated!($LHS, $RHS, -) + _ => impl_op_arithmetic!($LHS, $RHS, -) } }; } -macro_rules! impl_op_dissociated { +macro_rules! impl_op_arithmetic { ($LHS:expr, $RHS:expr, $OPERATION:tt) => { match ($LHS, $RHS) { // Binary operations on arguments with the same type: From d91a7850701261adba68ca668982ce66ac9ce33d Mon Sep 17 00:00:00 2001 From: berkaysynnada Date: Fri, 17 Mar 2023 14:24:49 +0300 Subject: [PATCH 21/23] Utilize DateTime parsing timezone --- datafusion-cli/Cargo.lock | 249 ++++++++++++++++++++++++-------- datafusion/common/Cargo.toml | 1 + datafusion/common/src/scalar.rs | 73 ++++++---- 3 files changed, 232 insertions(+), 91 deletions(-) diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock index 84b4ec7101f8..6fe28cabee06 100644 --- a/datafusion-cli/Cargo.lock +++ b/datafusion-cli/Cargo.lock @@ -74,16 +74,16 @@ checksum = "f410d3907b6b3647b9e7bca4551274b2e3d716aa940afb67b7287257401da921" dependencies = [ "ahash", "arrow-arith", - "arrow-array", - "arrow-buffer", + "arrow-array 34.0.0", + "arrow-buffer 34.0.0", "arrow-cast", "arrow-csv", - "arrow-data", + "arrow-data 34.0.0", "arrow-ipc", "arrow-json", "arrow-ord", "arrow-row", - "arrow-schema", + "arrow-schema 34.0.0", "arrow-select", "arrow-string", "comfy-table", @@ -95,10 +95,10 @@ version = "34.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f87391cf46473c9bc53dab68cb8872c3a81d4dfd1703f1c8aa397dba9880a043" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "arrow-array 34.0.0", + "arrow-buffer 34.0.0", + "arrow-data 34.0.0", + "arrow-schema 34.0.0", "chrono", "half", "num", @@ -111,15 +111,32 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d35d5475e65c57cffba06d0022e3006b677515f99b54af33a7cd54f6cdd4a5b5" dependencies = [ "ahash", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "arrow-buffer 34.0.0", + "arrow-data 34.0.0", + "arrow-schema 34.0.0", "chrono", "half", "hashbrown 0.13.2", "num", ] +[[package]] +name = "arrow-array" +version = "35.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43489bbff475545b78b0e20bde1d22abd6c99e54499839f9e815a2fa5134a51b" +dependencies = [ + "ahash", + "arrow-buffer 35.0.0", + "arrow-data 35.0.0", + "arrow-schema 35.0.0", + "chrono", + "chrono-tz", + "half", + "hashbrown 0.13.2", + "num", +] + [[package]] name = "arrow-buffer" version = "34.0.0" @@ -130,16 +147,26 @@ dependencies = [ "num", ] +[[package]] +name = "arrow-buffer" +version = "35.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a3759e4a52c593281184787af5435671dc8b1e78333e5a30242b2e2d6e3c9d1f" +dependencies = [ + "half", + "num", +] + [[package]] name = "arrow-cast" version = "34.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0a7285272c9897321dfdba59de29f5b05aeafd3cdedf104a941256d155f6d304" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "arrow-array 34.0.0", + "arrow-buffer 34.0.0", + "arrow-data 34.0.0", + "arrow-schema 34.0.0", "arrow-select", "chrono", "lexical-core", @@ -152,11 +179,11 @@ version = "34.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "981ee4e7f6a120da04e00d0b39182e1eeacccb59c8da74511de753c56b7fddf7" dependencies = [ - "arrow-array", - "arrow-buffer", + "arrow-array 34.0.0", + "arrow-buffer 34.0.0", "arrow-cast", - "arrow-data", - "arrow-schema", + "arrow-data 34.0.0", + "arrow-schema 34.0.0", "chrono", "csv", "csv-core", @@ -171,8 +198,20 @@ version = "34.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "27cc673ee6989ea6e4b4e8c7d461f7e06026a096c8f0b1a7288885ff71ae1e56" dependencies = [ - "arrow-buffer", - "arrow-schema", + "arrow-buffer 34.0.0", + "arrow-schema 34.0.0", + "half", + "num", +] + +[[package]] +name = "arrow-data" +version = "35.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19c7787c6cdbf9539b1ffb860bfc18c5848926ec3d62cbd52dc3b1ea35c874fd" +dependencies = [ + "arrow-buffer 35.0.0", + "arrow-schema 35.0.0", "half", "num", ] @@ -183,11 +222,11 @@ version = "34.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e37b8b69d9e59116b6b538e8514e0ec63a30f08b617ce800d31cb44e3ef64c1a" dependencies = [ - "arrow-array", - "arrow-buffer", + "arrow-array 34.0.0", + "arrow-buffer 34.0.0", "arrow-cast", - "arrow-data", - "arrow-schema", + "arrow-data 34.0.0", + "arrow-schema 34.0.0", "flatbuffers", ] @@ -197,11 +236,11 @@ version = "34.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "80c3fa0bed7cfebf6d18e46b733f9cb8a1cb43ce8e6539055ca3e1e48a426266" dependencies = [ - "arrow-array", - "arrow-buffer", + "arrow-array 34.0.0", + "arrow-buffer 34.0.0", "arrow-cast", - "arrow-data", - "arrow-schema", + "arrow-data 34.0.0", + "arrow-schema 34.0.0", "chrono", "half", "indexmap", @@ -216,10 +255,10 @@ version = "34.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d247dce7bed6a8d6a3c6debfa707a3a2f694383f0c692a39d736a593eae5ef94" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "arrow-array 34.0.0", + "arrow-buffer 34.0.0", + "arrow-data 34.0.0", + "arrow-schema 34.0.0", "arrow-select", "num", ] @@ -231,10 +270,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8d609c0181f963cea5c70fddf9a388595b5be441f3aa1d1cdbf728ca834bbd3a" dependencies = [ "ahash", - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "arrow-array 34.0.0", + "arrow-buffer 34.0.0", + "arrow-data 34.0.0", + "arrow-schema 34.0.0", "half", "hashbrown 0.13.2", ] @@ -245,16 +284,22 @@ version = "34.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "64951898473bfb8e22293e83a44f02874d2257514d49cd95f9aa4afcff183fbc" +[[package]] +name = "arrow-schema" +version = "35.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf6b26f6a6f8410e3b9531cbd1886399b99842701da77d4b4cf2013f7708f20f" + [[package]] name = "arrow-select" version = "34.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2a513d89c2e1ac22b28380900036cf1f3992c6443efc5e079de631dcf83c6888" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "arrow-array 34.0.0", + "arrow-buffer 34.0.0", + "arrow-data 34.0.0", + "arrow-schema 34.0.0", "num", ] @@ -264,10 +309,10 @@ version = "34.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5288979b2705dae1114c864d73150629add9153b9b8f1d7ee3963db94c372ba5" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "arrow-array 34.0.0", + "arrow-buffer 34.0.0", + "arrow-data 34.0.0", + "arrow-schema 34.0.0", "arrow-select", "regex", "regex-syntax", @@ -451,6 +496,28 @@ dependencies = [ "winapi", ] +[[package]] +name = "chrono-tz" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa48fa079165080f11d7753fd0bc175b7d391f276b965fe4b55bfad67856e463" +dependencies = [ + "chrono", + "chrono-tz-build", + "phf", +] + +[[package]] +name = "chrono-tz-build" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9998fb9f7e9b2111641485bf8beb32f92945f97f92a3d061f744cfef335f751" +dependencies = [ + "parse-zoneinfo", + "phf", + "phf_codegen", +] + [[package]] name = "clap" version = "3.2.23" @@ -737,6 +804,7 @@ name = "datafusion-common" version = "20.0.0" dependencies = [ "arrow", + "arrow-array 35.0.0", "chrono", "num_cpus", "object_store", @@ -792,8 +860,8 @@ version = "20.0.0" dependencies = [ "ahash", "arrow", - "arrow-buffer", - "arrow-schema", + "arrow-buffer 34.0.0", + "arrow-schema 34.0.0", "blake2", "blake3", "chrono", @@ -829,7 +897,7 @@ dependencies = [ name = "datafusion-sql" version = "20.0.0" dependencies = [ - "arrow-schema", + "arrow-schema 34.0.0", "datafusion-common", "datafusion-expr", "log", @@ -1204,6 +1272,12 @@ dependencies = [ "libc", ] +[[package]] +name = "hermit-abi" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fed44880c466736ef9a5c5b5facefb5ed0785676d0c02d612db14e54f0d84286" + [[package]] name = "http" version = "0.2.9" @@ -1342,10 +1416,11 @@ checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02" [[package]] name = "io-lifetimes" -version = "1.0.6" +version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cfa919a82ea574332e2de6e74b4c36e74d41982b335080fa59d4ef31be20fdf3" +checksum = "76e86b86ae312accbf05ade23ce76b625e0e47a255712b7414037385a1c05380" dependencies = [ + "hermit-abi 0.3.1", "libc", "windows-sys 0.45.0", ] @@ -1784,12 +1859,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7ac135ecf63ebb5f53dda0921b0b76d6048b3ef631a5f4760b9e8f863ff00cfa" dependencies = [ "ahash", - "arrow-array", - "arrow-buffer", + "arrow-array 34.0.0", + "arrow-buffer 34.0.0", "arrow-cast", - "arrow-data", + "arrow-data 34.0.0", "arrow-ipc", - "arrow-schema", + "arrow-schema 34.0.0", "arrow-select", "base64", "brotli", @@ -1810,6 +1885,15 @@ dependencies = [ "zstd 0.12.3+zstd.1.5.2", ] +[[package]] +name = "parse-zoneinfo" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c705f256449c60da65e11ff6626e0c16a0a0b96aaa348de61376b249bc340f41" +dependencies = [ + "regex", +] + [[package]] name = "paste" version = "1.0.12" @@ -1832,6 +1916,44 @@ dependencies = [ "indexmap", ] +[[package]] +name = "phf" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "928c6535de93548188ef63bb7c4036bd415cd8f36ad25af44b9789b2ee72a48c" +dependencies = [ + "phf_shared", +] + +[[package]] +name = "phf_codegen" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a56ac890c5e3ca598bbdeaa99964edb5b0258a583a9eb6ef4e89fc85d9224770" +dependencies = [ + "phf_generator", + "phf_shared", +] + +[[package]] +name = "phf_generator" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1181c94580fa345f50f19d738aaa39c0ed30a600d95cb2d3e23f94266f14fbf" +dependencies = [ + "phf_shared", + "rand", +] + +[[package]] +name = "phf_shared" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1fb5f6f826b772a8d4c0394209441e7d37cbbb967ae9c7e0e8134365c9ee676" +dependencies = [ + "siphasher", +] + [[package]] name = "pin-project-lite" version = "0.2.9" @@ -2171,18 +2293,18 @@ checksum = "e6b44e8fc93a14e66336d230954dda83d18b4605ccace8fe09bc7514a71ad0bc" [[package]] name = "serde" -version = "1.0.155" +version = "1.0.156" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "71f2b4817415c6d4210bfe1c7bfcf4801b2d904cb4d0e1a8fdb651013c9e86b8" +checksum = "314b5b092c0ade17c00142951e50ced110ec27cea304b1037c6969246c2469a4" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.155" +version = "1.0.156" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d071a94a3fac4aff69d023a7f411e33f40f3483f8c5190b1953822b6b76d7630" +checksum = "d7e29c4601e36bcec74a223228dce795f4cd3616341a4af93520ca1a837c087d" dependencies = [ "proc-macro2", "quote", @@ -2223,6 +2345,12 @@ dependencies = [ "digest", ] +[[package]] +name = "siphasher" +version = "0.3.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7bd3e3206899af3f8b12af284fafc038cc1dc2b41d1b89dd17297221c5d225de" + [[package]] name = "slab" version = "0.4.8" @@ -2639,12 +2767,11 @@ checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" [[package]] name = "walkdir" -version = "2.3.2" +version = "2.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "808cf2735cd4b6866113f648b791c6adc5714537bc222d9347bb203386ffda56" +checksum = "36df944cda56c7d8d8b7496af378e6b16de9284591917d307c9b4d313c44e698" dependencies = [ "same-file", - "winapi", "winapi-util", ] diff --git a/datafusion/common/Cargo.toml b/datafusion/common/Cargo.toml index 444ce9a2e0ae..7d78ed70eb35 100644 --- a/datafusion/common/Cargo.toml +++ b/datafusion/common/Cargo.toml @@ -41,6 +41,7 @@ pyarrow = ["pyo3", "arrow/pyarrow"] [dependencies] apache-avro = { version = "0.14", default-features = false, features = ["snappy"], optional = true } arrow = { workspace = true, default-features = false } +arrow-array = { version = "35.0.0", default-features = false, features = ["chrono-tz"] } chrono = { version = "0.4", default-features = false } cranelift-module = { version = "0.92.0", optional = true } num_cpus = "1.13.0" diff --git a/datafusion/common/src/scalar.rs b/datafusion/common/src/scalar.rs index f7455237e87b..5c620f27177e 100644 --- a/datafusion/common/src/scalar.rs +++ b/datafusion/common/src/scalar.rs @@ -43,7 +43,8 @@ use arrow::{ DECIMAL128_MAX_PRECISION, }, }; -use chrono::{DateTime, Datelike, Duration, FixedOffset, NaiveDate, NaiveDateTime}; +use arrow_array::timezone::Tz; +use chrono::{DateTime, Datelike, Duration, NaiveDate, NaiveDateTime, TimeZone}; // Constants we use throughout this file: const MILLISECS_IN_ONE_DAY: i64 = 86_400_000; @@ -902,17 +903,28 @@ fn with_timezone_to_naive_datetime( ts: i64, tz: &Option, mode: IntervalMode, -) -> Result { - let mut result = if let IntervalMode::Milli = mode { +) -> Result { + let datetime = if let IntervalMode::Milli = mode { ticks_to_naive_datetime::<1_000_000>(ts) } else { ticks_to_naive_datetime::<1>(ts) }?; + if let Some(tz) = tz { - let offset = parse_tz_to_offset(tz)?; - result = DateTime::::from_utc(result, offset).naive_local(); - }; - Ok(result) + let parsed_tz: Tz = FromStr::from_str(tz).map_err(|_| { + DataFusionError::Execution("cannot parse given timezone".to_string()) + })?; + let offset = parsed_tz + .offset_from_local_datetime(&datetime) + .single() + .ok_or_else(|| { + DataFusionError::Execution( + "error conversion result of timezone offset".to_string(), + ) + })?; + return Ok(DateTime::::from_local(datetime, offset).naive_utc()); + } + Ok(datetime) } /// This function creates the [`NaiveDateTime`] object corresponding to the @@ -930,29 +942,6 @@ fn ticks_to_naive_datetime(ticks: i64) -> Result Result { - const ERR_MSG: &str = "Can not parse timezone"; - let sign = tz - .chars() - .next() - .ok_or_else(|| DataFusionError::Execution(ERR_MSG.to_string()))?; - let hours = tz[1..3] - .parse::() - .map_err(|_| DataFusionError::Execution(ERR_MSG.to_string()))?; - let minutes = tz[4..6] - .parse::() - .map_err(|_| DataFusionError::Execution(ERR_MSG.to_string()))?; - match sign { - '-' => FixedOffset::east_opt(hours * 3600 + minutes * 60), - '+' => FixedOffset::west_opt(hours * 3600 + minutes * 60), - _ => None, - } - .ok_or_else(|| DataFusionError::Execution(ERR_MSG.to_string())) -} - #[inline] pub fn date32_add(days: i32, scalar: &ScalarValue, sign: i32) -> Result { let epoch = NaiveDate::from_ymd_opt(1970, 1, 1).unwrap(); @@ -5438,6 +5427,30 @@ mod tests { ), ScalarValue::IntervalDayTime(Some(IntervalDayTimeType::make_value(0, 0))), ), + // 10th test case, parsing different types of timezone input + ( + ScalarValue::TimestampSecond( + Some( + NaiveDate::from_ymd_opt(2023, 3, 17) + .unwrap() + .and_hms_opt(14, 10, 0) + .unwrap() + .timestamp(), + ), + Some("Europe/Istanbul".to_string()), + ), + ScalarValue::TimestampSecond( + Some( + NaiveDate::from_ymd_opt(2023, 3, 17) + .unwrap() + .and_hms_opt(4, 10, 0) + .unwrap() + .timestamp(), + ), + Some("America/Los_Angeles".to_string()), + ), + ScalarValue::IntervalDayTime(Some(IntervalDayTimeType::make_value(0, 0))), + ), ] } From f8ba64c4840f370be1856625bf93c99d8229136b Mon Sep 17 00:00:00 2001 From: Mehmet Ozan Kabak Date: Fri, 17 Mar 2023 16:22:44 -0500 Subject: [PATCH 22/23] Get rid of boilerplate by using convenience functions --- datafusion/common/src/scalar.rs | 214 ++++++++------------------------ 1 file changed, 54 insertions(+), 160 deletions(-) diff --git a/datafusion/common/src/scalar.rs b/datafusion/common/src/scalar.rs index 5c620f27177e..3ceb41db0845 100644 --- a/datafusion/common/src/scalar.rs +++ b/datafusion/common/src/scalar.rs @@ -4916,103 +4916,49 @@ mod tests { fn test_scalar_interval_add() { let cases = [ ( - ScalarValue::IntervalYearMonth(Some(IntervalYearMonthType::make_value( - 1, 12, - ))), - ScalarValue::IntervalYearMonth(Some(IntervalYearMonthType::make_value( - 1, 12, - ))), - ScalarValue::IntervalYearMonth(Some(IntervalYearMonthType::make_value( - 2, 24, - ))), + ScalarValue::new_interval_ym(1, 12), + ScalarValue::new_interval_ym(1, 12), + ScalarValue::new_interval_ym(2, 24), ), ( - ScalarValue::IntervalDayTime(Some(IntervalDayTimeType::make_value( - 1, 999, - ))), - ScalarValue::IntervalDayTime(Some(IntervalDayTimeType::make_value( - 1, 999, - ))), - ScalarValue::IntervalDayTime(Some(IntervalDayTimeType::make_value( - 2, 1998, - ))), + ScalarValue::new_interval_dt(1, 999), + ScalarValue::new_interval_dt(1, 999), + ScalarValue::new_interval_dt(2, 1998), ), ( - ScalarValue::IntervalMonthDayNano(Some( - IntervalMonthDayNanoType::make_value(12, 15, 123_456), - )), - ScalarValue::IntervalMonthDayNano(Some( - IntervalMonthDayNanoType::make_value(12, 15, 123_456), - )), - ScalarValue::IntervalMonthDayNano(Some( - IntervalMonthDayNanoType::make_value(24, 30, 246_912), - )), + ScalarValue::new_interval_mdn(12, 15, 123_456), + ScalarValue::new_interval_mdn(12, 15, 123_456), + ScalarValue::new_interval_mdn(24, 30, 246_912), ), ( - ScalarValue::IntervalYearMonth(Some(IntervalYearMonthType::make_value( - 0, 1, - ))), - ScalarValue::IntervalDayTime(Some(IntervalDayTimeType::make_value( - 29, 86_390, - ))), - ScalarValue::IntervalMonthDayNano(Some( - IntervalMonthDayNanoType::make_value(1, 29, 86_390_000_000), - )), + ScalarValue::new_interval_ym(0, 1), + ScalarValue::new_interval_dt(29, 86_390), + ScalarValue::new_interval_mdn(1, 29, 86_390_000_000), ), ( - ScalarValue::IntervalYearMonth(Some(IntervalYearMonthType::make_value( - 0, 1, - ))), - ScalarValue::IntervalMonthDayNano(Some( - IntervalMonthDayNanoType::make_value(2, 10, 999_999_999), - )), - ScalarValue::IntervalMonthDayNano(Some( - IntervalMonthDayNanoType::make_value(3, 10, 999_999_999), - )), + ScalarValue::new_interval_ym(0, 1), + ScalarValue::new_interval_mdn(2, 10, 999_999_999), + ScalarValue::new_interval_mdn(3, 10, 999_999_999), ), ( - ScalarValue::IntervalDayTime(Some(IntervalDayTimeType::make_value( - 400, 123_456, - ))), - ScalarValue::IntervalYearMonth(Some(IntervalYearMonthType::make_value( - 1, 1, - ))), - ScalarValue::IntervalMonthDayNano(Some( - IntervalMonthDayNanoType::make_value(13, 400, 123_456_000_000), - )), + ScalarValue::new_interval_dt(400, 123_456), + ScalarValue::new_interval_ym(1, 1), + ScalarValue::new_interval_mdn(13, 400, 123_456_000_000), ), ( - ScalarValue::IntervalDayTime(Some(IntervalDayTimeType::make_value( - 65, 321, - ))), - ScalarValue::IntervalMonthDayNano(Some( - IntervalMonthDayNanoType::make_value(2, 5, 1_000_000), - )), - ScalarValue::IntervalMonthDayNano(Some( - IntervalMonthDayNanoType::make_value(2, 70, 322_000_000), - )), + ScalarValue::new_interval_dt(65, 321), + ScalarValue::new_interval_mdn(2, 5, 1_000_000), + ScalarValue::new_interval_mdn(2, 70, 322_000_000), ), ( - ScalarValue::IntervalMonthDayNano(Some( - IntervalMonthDayNanoType::make_value(12, 15, 123_456), - )), - ScalarValue::IntervalYearMonth(Some(IntervalYearMonthType::make_value( - 2, 0, - ))), - ScalarValue::IntervalMonthDayNano(Some( - IntervalMonthDayNanoType::make_value(36, 15, 123_456), - )), + ScalarValue::new_interval_mdn(12, 15, 123_456), + ScalarValue::new_interval_ym(2, 0), + ScalarValue::new_interval_mdn(36, 15, 123_456), ), ( - ScalarValue::IntervalMonthDayNano(Some( - IntervalMonthDayNanoType::make_value(12, 15, 100_000), - )), - ScalarValue::IntervalDayTime(Some(IntervalDayTimeType::make_value( - 370, 1, - ))), - ScalarValue::IntervalMonthDayNano(Some( - IntervalMonthDayNanoType::make_value(12, 385, 1_100_000), - )), + ScalarValue::new_interval_mdn(12, 15, 100_000), + ScalarValue::new_interval_dt(370, 1), + ScalarValue::new_interval_mdn(12, 385, 1_100_000), ), ]; for (lhs, rhs, expected) in cases.iter() { @@ -5027,101 +4973,49 @@ mod tests { fn test_scalar_interval_sub() { let cases = [ ( - ScalarValue::IntervalYearMonth(Some(IntervalYearMonthType::make_value( - 1, 12, - ))), - ScalarValue::IntervalYearMonth(Some(IntervalYearMonthType::make_value( - 1, 12, - ))), - ScalarValue::IntervalYearMonth(Some(IntervalYearMonthType::make_value( - 0, 0, - ))), + ScalarValue::new_interval_ym(1, 12), + ScalarValue::new_interval_ym(1, 12), + ScalarValue::new_interval_ym(0, 0), ), ( - ScalarValue::IntervalDayTime(Some(IntervalDayTimeType::make_value( - 1, 999, - ))), - ScalarValue::IntervalDayTime(Some(IntervalDayTimeType::make_value( - 1, 999, - ))), - ScalarValue::IntervalDayTime(Some(IntervalDayTimeType::make_value(0, 0))), + ScalarValue::new_interval_dt(1, 999), + ScalarValue::new_interval_dt(1, 999), + ScalarValue::new_interval_dt(0, 0), ), ( - ScalarValue::IntervalMonthDayNano(Some( - IntervalMonthDayNanoType::make_value(12, 15, 123_456), - )), - ScalarValue::IntervalMonthDayNano(Some( - IntervalMonthDayNanoType::make_value(12, 15, 123_456), - )), - ScalarValue::IntervalMonthDayNano(Some( - IntervalMonthDayNanoType::make_value(0, 0, 0), - )), + ScalarValue::new_interval_mdn(12, 15, 123_456), + ScalarValue::new_interval_mdn(12, 15, 123_456), + ScalarValue::new_interval_mdn(0, 0, 0), ), ( - ScalarValue::IntervalYearMonth(Some(IntervalYearMonthType::make_value( - 0, 1, - ))), - ScalarValue::IntervalDayTime(Some(IntervalDayTimeType::make_value( - 29, 999_999, - ))), - ScalarValue::IntervalMonthDayNano(Some( - IntervalMonthDayNanoType::make_value(1, -29, -999_999_000_000), - )), + ScalarValue::new_interval_ym(0, 1), + ScalarValue::new_interval_dt(29, 999_999), + ScalarValue::new_interval_mdn(1, -29, -999_999_000_000), ), ( - ScalarValue::IntervalYearMonth(Some(IntervalYearMonthType::make_value( - 0, 1, - ))), - ScalarValue::IntervalMonthDayNano(Some( - IntervalMonthDayNanoType::make_value(2, 10, 999_999_999), - )), - ScalarValue::IntervalMonthDayNano(Some( - IntervalMonthDayNanoType::make_value(-1, -10, -999_999_999), - )), + ScalarValue::new_interval_ym(0, 1), + ScalarValue::new_interval_mdn(2, 10, 999_999_999), + ScalarValue::new_interval_mdn(-1, -10, -999_999_999), ), ( - ScalarValue::IntervalDayTime(Some(IntervalDayTimeType::make_value( - 400, 123_456, - ))), - ScalarValue::IntervalYearMonth(Some(IntervalYearMonthType::make_value( - 1, 1, - ))), - ScalarValue::IntervalMonthDayNano(Some( - IntervalMonthDayNanoType::make_value(-13, 400, 123_456_000_000), - )), + ScalarValue::new_interval_dt(400, 123_456), + ScalarValue::new_interval_ym(1, 1), + ScalarValue::new_interval_mdn(-13, 400, 123_456_000_000), ), ( - ScalarValue::IntervalDayTime(Some(IntervalDayTimeType::make_value( - 65, 321, - ))), - ScalarValue::IntervalMonthDayNano(Some( - IntervalMonthDayNanoType::make_value(2, 5, 1_000_000), - )), - ScalarValue::IntervalMonthDayNano(Some( - IntervalMonthDayNanoType::make_value(-2, 60, 320_000_000), - )), + ScalarValue::new_interval_dt(65, 321), + ScalarValue::new_interval_mdn(2, 5, 1_000_000), + ScalarValue::new_interval_mdn(-2, 60, 320_000_000), ), ( - ScalarValue::IntervalMonthDayNano(Some( - IntervalMonthDayNanoType::make_value(12, 15, 123_456), - )), - ScalarValue::IntervalYearMonth(Some(IntervalYearMonthType::make_value( - 2, 0, - ))), - ScalarValue::IntervalMonthDayNano(Some( - IntervalMonthDayNanoType::make_value(-12, 15, 123_456), - )), + ScalarValue::new_interval_mdn(12, 15, 123_456), + ScalarValue::new_interval_ym(2, 0), + ScalarValue::new_interval_mdn(-12, 15, 123_456), ), ( - ScalarValue::IntervalMonthDayNano(Some( - IntervalMonthDayNanoType::make_value(12, 15, 100_000), - )), - ScalarValue::IntervalDayTime(Some(IntervalDayTimeType::make_value( - 370, 1, - ))), - ScalarValue::IntervalMonthDayNano(Some( - IntervalMonthDayNanoType::make_value(12, -355, -900_000), - )), + ScalarValue::new_interval_mdn(12, 15, 100_000), + ScalarValue::new_interval_dt(370, 1), + ScalarValue::new_interval_mdn(12, -355, -900_000), ), ]; for (lhs, rhs, expected) in cases.iter() { From 737e22c7e51832319d3c7929820c45df0afbf347 Mon Sep 17 00:00:00 2001 From: Mehmet Ozan Kabak Date: Fri, 17 Mar 2023 16:46:35 -0500 Subject: [PATCH 23/23] Get rid of boilerplate by using convenience functions (part 2) --- datafusion/common/src/scalar.rs | 123 +++++++++++--------------------- 1 file changed, 42 insertions(+), 81 deletions(-) diff --git a/datafusion/common/src/scalar.rs b/datafusion/common/src/scalar.rs index 3ceb41db0845..92cdab3ebba3 100644 --- a/datafusion/common/src/scalar.rs +++ b/datafusion/common/src/scalar.rs @@ -660,9 +660,10 @@ macro_rules! impl_op_arithmetic { ( ScalarValue::IntervalYearMonth(Some(lhs)), ScalarValue::IntervalYearMonth(Some(rhs)), - ) => Ok(ScalarValue::IntervalYearMonth(Some( - IntervalYearMonthType::make_value(0, lhs + rhs * get_sign!($OPERATION)), - ))), + ) => Ok(ScalarValue::new_interval_ym( + 0, + lhs + rhs * get_sign!($OPERATION), + )), ( ScalarValue::IntervalDayTime(Some(lhs)), ScalarValue::IntervalDayTime(Some(rhs)), @@ -670,12 +671,10 @@ macro_rules! impl_op_arithmetic { let sign = get_sign!($OPERATION); let (lhs_days, lhs_millis) = IntervalDayTimeType::to_parts(*lhs); let (rhs_days, rhs_millis) = IntervalDayTimeType::to_parts(*rhs); - Ok(ScalarValue::IntervalDayTime(Some( - IntervalDayTimeType::make_value( - lhs_days + rhs_days * sign, - lhs_millis + rhs_millis * sign, - ), - ))) + Ok(ScalarValue::new_interval_dt( + lhs_days + rhs_days * sign, + lhs_millis + rhs_millis * sign, + )) } ( ScalarValue::IntervalMonthDayNano(Some(lhs)), @@ -686,13 +685,11 @@ macro_rules! impl_op_arithmetic { IntervalMonthDayNanoType::to_parts(*lhs); let (rhs_months, rhs_days, rhs_nanos) = IntervalMonthDayNanoType::to_parts(*rhs); - Ok(ScalarValue::IntervalMonthDayNano(Some( - IntervalMonthDayNanoType::make_value( - lhs_months + rhs_months * sign, - lhs_days + rhs_days * sign, - lhs_nanos + rhs_nanos * (sign as i64), - ), - ))) + Ok(ScalarValue::new_interval_mdn( + lhs_months + rhs_months * sign, + lhs_days + rhs_days * sign, + lhs_nanos + rhs_nanos * (sign as i64), + )) } // Binary operations on arguments with different types: (ScalarValue::Date32(Some(days)), _) => { @@ -784,9 +781,7 @@ fn op_ym_dt(mut lhs: i32, rhs: i64, sign: i32, commute: bool) -> Result Result { let as_millisecs = delta_secs.num_milliseconds(); - Ok(ScalarValue::IntervalDayTime(Some( - IntervalDayTimeType::make_value( - (as_millisecs / MILLISECS_IN_ONE_DAY) as i32, - (as_millisecs % MILLISECS_IN_ONE_DAY) as i32, - ), - ))) + Ok(ScalarValue::new_interval_dt( + (as_millisecs / MILLISECS_IN_ONE_DAY) as i32, + (as_millisecs % MILLISECS_IN_ONE_DAY) as i32, + )) } IntervalMode::Nano => { let as_nanosecs = delta_secs.num_nanoseconds().ok_or_else(|| { @@ -885,13 +876,11 @@ fn ts_sub_to_interval( "Can not compute timestamp differences with nanosecond precision", )) })?; - Ok(ScalarValue::IntervalMonthDayNano(Some( - IntervalMonthDayNanoType::make_value( - 0, - (as_nanosecs / NANOSECS_IN_ONE_DAY) as i32, - as_nanosecs % NANOSECS_IN_ONE_DAY, - ), - ))) + Ok(ScalarValue::new_interval_mdn( + 0, + (as_nanosecs / NANOSECS_IN_ONE_DAY) as i32, + as_nanosecs % NANOSECS_IN_ONE_DAY, + )) } } } @@ -5101,9 +5090,7 @@ mod tests { ), Some("+00:00".to_string()), ), - ScalarValue::IntervalMonthDayNano(Some( - IntervalMonthDayNanoType::make_value(0, 0, 0), - )), + ScalarValue::new_interval_mdn(0, 0, 0), ), // 2nd test case, january with 31 days plus february with 28 days, with timezone ( @@ -5127,9 +5114,7 @@ mod tests { ), Some("-01:00".to_string()), ), - ScalarValue::IntervalMonthDayNano(Some( - IntervalMonthDayNanoType::make_value(0, sign * 59, 0), - )), + ScalarValue::new_interval_mdn(0, sign * 59, 0), ), // 3rd test case, 29-days long february minus previous, year with timezone ( @@ -5153,10 +5138,7 @@ mod tests { ), Some("+01:00".to_string()), ), - ScalarValue::IntervalDayTime(Some(IntervalDayTimeType::make_value( - sign * 60, - 0, - ))), + ScalarValue::new_interval_dt(sign * 60, 0), ), // 4th test case, leap years occur mostly every 4 years, but every 100 years // we skip a leap year unless the year is divisible by 400, so 31 + 28 = 59 @@ -5181,10 +5163,7 @@ mod tests { ), Some("+11:59".to_string()), ), - ScalarValue::IntervalDayTime(Some(IntervalDayTimeType::make_value( - sign * 59, - 0, - ))), + ScalarValue::new_interval_dt(sign * 59, 0), ), // 5th test case, without timezone positively seemed, but with timezone, // negative resulting interval @@ -5209,10 +5188,7 @@ mod tests { ), Some("-12:00".to_string()), ), - ScalarValue::IntervalDayTime(Some(IntervalDayTimeType::make_value( - 0, - sign * -43_200_000, - ))), + ScalarValue::new_interval_dt(0, sign * -43_200_000), ), // 6th test case, no problem before unix epoch beginning ( @@ -5236,13 +5212,11 @@ mod tests { ), None, ), - ScalarValue::IntervalMonthDayNano(Some( - IntervalMonthDayNanoType::make_value( - 0, - 365 * sign, - sign as i64 * 3_723_000_015_000, - ), - )), + ScalarValue::new_interval_mdn( + 0, + 365 * sign, + sign as i64 * 3_723_000_015_000, + ), ), // 7th test case, no problem with big intervals ( @@ -5266,9 +5240,7 @@ mod tests { ), None, ), - ScalarValue::IntervalMonthDayNano(Some( - IntervalMonthDayNanoType::make_value(0, sign * 36525, 0), - )), + ScalarValue::new_interval_mdn(0, sign * 36525, 0), ), // 8th test case, no problem detecting 366-days long years ( @@ -5292,10 +5264,7 @@ mod tests { ), None, ), - ScalarValue::IntervalDayTime(Some(IntervalDayTimeType::make_value( - sign * 366, - 0, - ))), + ScalarValue::new_interval_dt(sign * 366, 0), ), // 9th test case, no problem with unrealistic timezones ( @@ -5319,7 +5288,7 @@ mod tests { ), Some("-23:59".to_string()), ), - ScalarValue::IntervalDayTime(Some(IntervalDayTimeType::make_value(0, 0))), + ScalarValue::new_interval_dt(0, 0), ), // 10th test case, parsing different types of timezone input ( @@ -5343,7 +5312,7 @@ mod tests { ), Some("America/Los_Angeles".to_string()), ), - ScalarValue::IntervalDayTime(Some(IntervalDayTimeType::make_value(0, 0))), + ScalarValue::new_interval_dt(0, 0), ), ] } @@ -5422,28 +5391,20 @@ mod tests { let days = rng.gen_range(0..5000); // to not break second precision let millis = rng.gen_range(0..SECS_IN_ONE_DAY) * 1000; - intervals.push(ScalarValue::IntervalDayTime(Some( - IntervalDayTimeType::make_value(days, millis), - ))) + intervals.push(ScalarValue::new_interval_dt(days, millis)); } else if i % 4 == 1 { let days = rng.gen_range(0..5000); let millisec = rng.gen_range(0..(MILLISECS_IN_ONE_DAY as i32)); - intervals.push(ScalarValue::IntervalDayTime(Some( - IntervalDayTimeType::make_value(days, millisec), - ))) + intervals.push(ScalarValue::new_interval_dt(days, millisec)); } else if i % 4 == 2 { let days = rng.gen_range(0..5000); // to not break microsec precision let nanosec = rng.gen_range(0..MICROSECS_IN_ONE_DAY) * 1000; - intervals.push(ScalarValue::IntervalMonthDayNano(Some( - IntervalMonthDayNanoType::make_value(0, days, nanosec), - ))) + intervals.push(ScalarValue::new_interval_mdn(0, days, nanosec)); } else { let days = rng.gen_range(0..5000); let nanosec = rng.gen_range(0..NANOSECS_IN_ONE_DAY); - intervals.push(ScalarValue::IntervalMonthDayNano(Some( - IntervalMonthDayNanoType::make_value(0, days, nanosec), - ))); + intervals.push(ScalarValue::new_interval_mdn(0, days, nanosec)); } } intervals