From 68a89b3404dfad0c2c7e704a1d5910f88d4eec7c Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies Date: Mon, 3 Jul 2023 18:09:08 +0100 Subject: [PATCH] Datum based arithmetic --- Cargo.toml | 12 +- datafusion-cli/Cargo.lock | 45 +- datafusion-cli/Cargo.toml | 7 + datafusion/common/src/scalar.rs | 151 +- datafusion/core/tests/sql/timestamp.rs | 18 +- datafusion/core/tests/sql/udf.rs | 2 +- .../sqllogictests/test_files/arrow_typeof.slt | 4 +- .../tests/sqllogictests/test_files/dates.slt | 6 +- .../sqllogictests/test_files/decimal.slt | 24 +- .../sqllogictests/test_files/interval.slt | 32 +- .../sqllogictests/test_files/timestamps.slt | 16 +- .../test_files/type_coercion.slt | 8 +- datafusion/expr/src/type_coercion/binary.rs | 259 +-- .../physical-expr/src/expressions/binary.rs | 331 +-- .../src/expressions/binary/kernels_arrow.rs | 2033 +---------------- .../physical-expr/src/expressions/datetime.rs | 931 -------- .../physical-expr/src/expressions/mod.rs | 2 - .../physical-expr/src/intervals/test_utils.rs | 22 +- datafusion/physical-expr/src/planner.rs | 53 +- datafusion/proto/proto/datafusion.proto | 5 +- datafusion/proto/src/generated/pbjson.rs | 14 - datafusion/proto/src/generated/prost.rs | 14 +- .../proto/src/physical_plan/from_proto.rs | 20 +- datafusion/proto/src/physical_plan/mod.rs | 6 +- .../proto/src/physical_plan/to_proto.rs | 16 +- 25 files changed, 270 insertions(+), 3761 deletions(-) delete mode 100644 datafusion/physical-expr/src/expressions/datetime.rs diff --git a/Cargo.toml b/Cargo.toml index 1e493f864c03..c90b43f27e8b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -47,10 +47,10 @@ rust-version = "1.64" [workspace.dependencies] arrow = { version = "43.0.0", features = ["prettyprint", "dyn_cmp_dict"] } -arrow-flight = { version = "43.0.0", features = ["flight-sql-experimental"] } +arrow-array = { version = "43.0.0", default-features = false, features = ["chrono-tz"] } arrow-buffer = { version = "43.0.0", default-features = false } +arrow-flight = { version = "43.0.0", features = ["flight-sql-experimental"] } arrow-schema = { version = "43.0.0", default-features = false } -arrow-array = { version = "43.0.0", default-features = false, features = ["chrono-tz"] } parquet = { version = "43.0.0", features = ["arrow", "async", "object_store"] } sqlparser = { version = "0.35", features = ["visitor"] } @@ -71,3 +71,11 @@ opt-level = 3 overflow-checks = false panic = 'unwind' rpath = false + +[patch.crates-io] +arrow = { git = "https://github.com/tustvold/arrow-rs.git", rev = "9c461f7027871b3d1a1b30de7fd26b3ac01cb096" } +arrow-array = { git = "https://github.com/tustvold/arrow-rs.git", rev = "9c461f7027871b3d1a1b30de7fd26b3ac01cb096" } +arrow-buffer = { git = "https://github.com/tustvold/arrow-rs.git", rev = "9c461f7027871b3d1a1b30de7fd26b3ac01cb096" } +arrow-flight = { git = "https://github.com/tustvold/arrow-rs.git", rev = "9c461f7027871b3d1a1b30de7fd26b3ac01cb096" } +arrow-schema = { git = "https://github.com/tustvold/arrow-rs.git", rev = "9c461f7027871b3d1a1b30de7fd26b3ac01cb096" } +parquet = { git = "https://github.com/tustvold/arrow-rs.git", rev = "9c461f7027871b3d1a1b30de7fd26b3ac01cb096" } diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock index b04b81cbec6f..cec914a4d52f 100644 --- a/datafusion-cli/Cargo.lock +++ b/datafusion-cli/Cargo.lock @@ -87,8 +87,7 @@ checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" [[package]] name = "arrow" version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2feeebd77b34b0bc88f224e06d01c27da4733997cc4789a4e056196656cdc59a" +source = "git+https://github.com/tustvold/arrow-rs.git?rev=9c461f7027871b3d1a1b30de7fd26b3ac01cb096#9c461f7027871b3d1a1b30de7fd26b3ac01cb096" dependencies = [ "ahash", "arrow-arith", @@ -109,8 +108,7 @@ dependencies = [ [[package]] name = "arrow-arith" version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7173f5dc49c0ecb5135f52565af33afd3fdc9a12d13bd6f9973e8b96305e4b2e" +source = "git+https://github.com/tustvold/arrow-rs.git?rev=9c461f7027871b3d1a1b30de7fd26b3ac01cb096#9c461f7027871b3d1a1b30de7fd26b3ac01cb096" dependencies = [ "arrow-array", "arrow-buffer", @@ -124,8 +122,7 @@ dependencies = [ [[package]] name = "arrow-array" version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "63d7ea725f7d1f8bb2cffc53ef538557e95fc802e217d5be25122d402e22f3d0" +source = "git+https://github.com/tustvold/arrow-rs.git?rev=9c461f7027871b3d1a1b30de7fd26b3ac01cb096#9c461f7027871b3d1a1b30de7fd26b3ac01cb096" dependencies = [ "ahash", "arrow-buffer", @@ -141,8 +138,7 @@ dependencies = [ [[package]] name = "arrow-buffer" version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bdbe439e077f484e5000b9e1d47b5e4c0d15f2b311a8f5bcc682553d5d67a722" +source = "git+https://github.com/tustvold/arrow-rs.git?rev=9c461f7027871b3d1a1b30de7fd26b3ac01cb096#9c461f7027871b3d1a1b30de7fd26b3ac01cb096" dependencies = [ "half", "num", @@ -151,8 +147,7 @@ dependencies = [ [[package]] name = "arrow-cast" version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93913cc14875770aa1eef5e310765e855effa352c094cb1c7c00607d0f37b4e1" +source = "git+https://github.com/tustvold/arrow-rs.git?rev=9c461f7027871b3d1a1b30de7fd26b3ac01cb096#9c461f7027871b3d1a1b30de7fd26b3ac01cb096" dependencies = [ "arrow-array", "arrow-buffer", @@ -169,8 +164,7 @@ dependencies = [ [[package]] name = "arrow-csv" version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef55b67c55ed877e6fe7b923121c19dae5e31ca70249ea2779a17b58fb0fbd9a" +source = "git+https://github.com/tustvold/arrow-rs.git?rev=9c461f7027871b3d1a1b30de7fd26b3ac01cb096#9c461f7027871b3d1a1b30de7fd26b3ac01cb096" dependencies = [ "arrow-array", "arrow-buffer", @@ -188,8 +182,7 @@ dependencies = [ [[package]] name = "arrow-data" version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4f4f4a3c54614126a71ab91f6631c9743eb4643d6e9318b74191da9dc6e028b" +source = "git+https://github.com/tustvold/arrow-rs.git?rev=9c461f7027871b3d1a1b30de7fd26b3ac01cb096#9c461f7027871b3d1a1b30de7fd26b3ac01cb096" dependencies = [ "arrow-buffer", "arrow-schema", @@ -200,8 +193,7 @@ dependencies = [ [[package]] name = "arrow-ipc" version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d41a3659f984a524ef1c2981d43747b24d8eec78e2425267fcd0ef34ce71cd18" +source = "git+https://github.com/tustvold/arrow-rs.git?rev=9c461f7027871b3d1a1b30de7fd26b3ac01cb096#9c461f7027871b3d1a1b30de7fd26b3ac01cb096" dependencies = [ "arrow-array", "arrow-buffer", @@ -214,8 +206,7 @@ dependencies = [ [[package]] name = "arrow-json" version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "10b95faa95a378f56ef32d84cc0104ea998c39ef7cd1faaa6b4cebf8ea92846d" +source = "git+https://github.com/tustvold/arrow-rs.git?rev=9c461f7027871b3d1a1b30de7fd26b3ac01cb096#9c461f7027871b3d1a1b30de7fd26b3ac01cb096" dependencies = [ "arrow-array", "arrow-buffer", @@ -234,8 +225,7 @@ dependencies = [ [[package]] name = "arrow-ord" version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c68549a4284d9f8b39586afb8d5ff8158b8f0286353a4844deb1d11cf1ba1f26" +source = "git+https://github.com/tustvold/arrow-rs.git?rev=9c461f7027871b3d1a1b30de7fd26b3ac01cb096#9c461f7027871b3d1a1b30de7fd26b3ac01cb096" dependencies = [ "arrow-array", "arrow-buffer", @@ -249,8 +239,7 @@ dependencies = [ [[package]] name = "arrow-row" version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a75a4a757afc301ce010adadff54d79d66140c4282ed3de565f6ccb716a5cf3" +source = "git+https://github.com/tustvold/arrow-rs.git?rev=9c461f7027871b3d1a1b30de7fd26b3ac01cb096#9c461f7027871b3d1a1b30de7fd26b3ac01cb096" dependencies = [ "ahash", "arrow-array", @@ -264,14 +253,12 @@ dependencies = [ [[package]] name = "arrow-schema" version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2bebcb57eef570b15afbcf2d07d813eb476fde9f6dd69c81004d6476c197e87e" +source = "git+https://github.com/tustvold/arrow-rs.git?rev=9c461f7027871b3d1a1b30de7fd26b3ac01cb096#9c461f7027871b3d1a1b30de7fd26b3ac01cb096" [[package]] name = "arrow-select" version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6e2943fa433a48921e914417173816af64eef61c0a3d448280e6c40a62df221" +source = "git+https://github.com/tustvold/arrow-rs.git?rev=9c461f7027871b3d1a1b30de7fd26b3ac01cb096#9c461f7027871b3d1a1b30de7fd26b3ac01cb096" dependencies = [ "arrow-array", "arrow-buffer", @@ -283,8 +270,7 @@ dependencies = [ [[package]] name = "arrow-string" version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbc92ed638851774f6d7af1ad900b92bc1486746497511868b4298fcbcfa35af" +source = "git+https://github.com/tustvold/arrow-rs.git?rev=9c461f7027871b3d1a1b30de7fd26b3ac01cb096#9c461f7027871b3d1a1b30de7fd26b3ac01cb096" dependencies = [ "arrow-array", "arrow-buffer", @@ -2233,8 +2219,7 @@ dependencies = [ [[package]] name = "parquet" version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec7267a9607c3f955d4d0ac41b88a67cecc0d8d009173ad3da390699a6cb3750" +source = "git+https://github.com/tustvold/arrow-rs.git?rev=9c461f7027871b3d1a1b30de7fd26b3ac01cb096#9c461f7027871b3d1a1b30de7fd26b3ac01cb096" dependencies = [ "ahash", "arrow-array", diff --git a/datafusion-cli/Cargo.toml b/datafusion-cli/Cargo.toml index c424e5ae1a4c..5e7464510a7f 100644 --- a/datafusion-cli/Cargo.toml +++ b/datafusion-cli/Cargo.toml @@ -49,3 +49,10 @@ assert_cmd = "2.0" ctor = "0.2.0" predicates = "3.0" rstest = "0.17" + +[patch.crates-io] +arrow = { git = "https://github.com/tustvold/arrow-rs.git", rev = "9c461f7027871b3d1a1b30de7fd26b3ac01cb096" } +arrow-array = { git = "https://github.com/tustvold/arrow-rs.git", rev = "9c461f7027871b3d1a1b30de7fd26b3ac01cb096" } +arrow-buffer = { git = "https://github.com/tustvold/arrow-rs.git", rev = "9c461f7027871b3d1a1b30de7fd26b3ac01cb096" } +arrow-schema = { git = "https://github.com/tustvold/arrow-rs.git", rev = "9c461f7027871b3d1a1b30de7fd26b3ac01cb096" } +parquet = { git = "https://github.com/tustvold/arrow-rs.git", rev = "9c461f7027871b3d1a1b30de7fd26b3ac01cb096" } diff --git a/datafusion/common/src/scalar.rs b/datafusion/common/src/scalar.rs index 4fef60020f77..112cb7a3a26c 100644 --- a/datafusion/common/src/scalar.rs +++ b/datafusion/common/src/scalar.rs @@ -46,7 +46,7 @@ use arrow::{ DECIMAL128_MAX_PRECISION, }, }; -use arrow_array::timezone::Tz; +use arrow_array::{timezone::Tz, ArrowNativeTypeOp}; use chrono::{Datelike, Duration, NaiveDate, NaiveDateTime}; // Constants we use throughout this file: @@ -743,55 +743,21 @@ macro_rules! impl_op { ($LHS:expr, $RHS:expr, -) => { match ($LHS, $RHS) { ( - ScalarValue::TimestampSecond(Some(ts_lhs), tz_lhs), - ScalarValue::TimestampSecond(Some(ts_rhs), tz_rhs), - ) => { - let err = || { - DataFusionError::Execution( - "Overflow while converting seconds to milliseconds".to_string(), - ) - }; - ts_sub_to_interval::( - ts_lhs.checked_mul(1_000).ok_or_else(err)?, - ts_rhs.checked_mul(1_000).ok_or_else(err)?, - tz_lhs.as_deref(), - tz_rhs.as_deref(), - ) - }, + ScalarValue::TimestampSecond(Some(ts_lhs), _), + ScalarValue::TimestampSecond(Some(ts_rhs), _), + ) => Ok(ScalarValue::DurationSecond(Some(ts_lhs.sub_checked(*ts_rhs)?))), ( - ScalarValue::TimestampMillisecond(Some(ts_lhs), tz_lhs), - ScalarValue::TimestampMillisecond(Some(ts_rhs), tz_rhs), - ) => ts_sub_to_interval::( - *ts_lhs, - *ts_rhs, - tz_lhs.as_deref(), - tz_rhs.as_deref(), - ), + ScalarValue::TimestampMillisecond(Some(ts_lhs), _), + ScalarValue::TimestampMillisecond(Some(ts_rhs), _), + ) => Ok(ScalarValue::DurationMillisecond(Some(ts_lhs.sub_checked(*ts_rhs)?))), ( - ScalarValue::TimestampMicrosecond(Some(ts_lhs), tz_lhs), - ScalarValue::TimestampMicrosecond(Some(ts_rhs), tz_rhs), - ) => { - let err = || { - DataFusionError::Execution( - "Overflow while converting microseconds to nanoseconds".to_string(), - ) - }; - ts_sub_to_interval::( - ts_lhs.checked_mul(1_000).ok_or_else(err)?, - ts_rhs.checked_mul(1_000).ok_or_else(err)?, - tz_lhs.as_deref(), - tz_rhs.as_deref(), - ) - }, + ScalarValue::TimestampMicrosecond(Some(ts_lhs), _), + ScalarValue::TimestampMicrosecond(Some(ts_rhs), _), + ) => Ok(ScalarValue::DurationMicrosecond(Some(ts_lhs.sub_checked(*ts_rhs)?))), ( - ScalarValue::TimestampNanosecond(Some(ts_lhs), tz_lhs), - ScalarValue::TimestampNanosecond(Some(ts_rhs), tz_rhs), - ) => ts_sub_to_interval::( - *ts_lhs, - *ts_rhs, - tz_lhs.as_deref(), - tz_rhs.as_deref(), - ), + ScalarValue::TimestampNanosecond(Some(ts_lhs), _), + ScalarValue::TimestampNanosecond(Some(ts_rhs), _), + ) => Ok(ScalarValue::DurationNanosecond(Some(ts_lhs.sub_checked(*ts_rhs)?))), _ => impl_op_arithmetic!($LHS, $RHS, -) } }; @@ -1147,49 +1113,6 @@ pub const MDN_MODE: i8 = 2; pub const MILLISECOND_MODE: bool = false; pub const NANOSECOND_MODE: bool = true; -/// This function computes subtracts `rhs_ts` from `lhs_ts`, taking timezones -/// into account when given. Units of the resulting interval is specified by -/// the constant `TIME_MODE`. -/// The default behavior of Datafusion is the following: -/// - When subtracting timestamps at seconds/milliseconds precision, the output -/// interval will have the type [`IntervalDayTimeType`]. -/// - When subtracting timestamps at microseconds/nanoseconds precision, the -/// output interval will have the type [`IntervalMonthDayNanoType`]. -fn ts_sub_to_interval( - lhs_ts: i64, - rhs_ts: i64, - lhs_tz: Option<&str>, - rhs_tz: Option<&str>, -) -> Result { - let parsed_lhs_tz = parse_timezones(lhs_tz)?; - let parsed_rhs_tz = parse_timezones(rhs_tz)?; - - let (naive_lhs, naive_rhs) = - calculate_naives::(lhs_ts, parsed_lhs_tz, rhs_ts, parsed_rhs_tz)?; - let delta_secs = naive_lhs.signed_duration_since(naive_rhs); - - match TIME_MODE { - MILLISECOND_MODE => { - let as_millisecs = delta_secs.num_milliseconds(); - Ok(ScalarValue::new_interval_dt( - (as_millisecs / MILLISECS_IN_ONE_DAY) as i32, - (as_millisecs % MILLISECS_IN_ONE_DAY) as i32, - )) - } - NANOSECOND_MODE => { - let as_nanosecs = delta_secs.num_nanoseconds().ok_or_else(|| { - DataFusionError::Execution(String::from( - "Can not compute timestamp differences with nanosecond precision", - )) - })?; - Ok(ScalarValue::new_interval_mdn( - 0, - (as_nanosecs / NANOSECS_IN_ONE_DAY) as i32, - as_nanosecs % NANOSECS_IN_ONE_DAY, - )) - } - } -} /// This function parses the timezone from string to Tz. /// If it cannot parse or timezone field is [`None`], it returns [`None`]. @@ -1424,6 +1347,14 @@ where ScalarValue::IntervalDayTime(Some(i)) => add_day_time(prior, *i, sign), ScalarValue::IntervalYearMonth(Some(i)) => shift_months(prior, *i, sign), ScalarValue::IntervalMonthDayNano(Some(i)) => add_m_d_nano(prior, *i, sign), + ScalarValue::DurationSecond(Some(v)) => prior.add(Duration::seconds(*v)), + ScalarValue::DurationMillisecond(Some(v)) => { + prior.add(Duration::milliseconds(*v)) + } + ScalarValue::DurationMicrosecond(Some(v)) => { + prior.add(Duration::microseconds(*v)) + } + ScalarValue::DurationNanosecond(Some(v)) => prior.add(Duration::nanoseconds(*v)), other => Err(DataFusionError::Execution(format!( "DateIntervalExpr does not support non-interval type {other:?}" )))?, @@ -1891,6 +1822,16 @@ impl ScalarValue { DataType::Interval(IntervalUnit::MonthDayNano) => { ScalarValue::IntervalMonthDayNano(Some(0)) } + DataType::Duration(TimeUnit::Second) => ScalarValue::DurationSecond(None), + DataType::Duration(TimeUnit::Millisecond) => { + ScalarValue::DurationMillisecond(None) + } + DataType::Duration(TimeUnit::Microsecond) => { + ScalarValue::DurationMicrosecond(None) + } + DataType::Duration(TimeUnit::Nanosecond) => { + ScalarValue::DurationNanosecond(None) + } _ => { return Err(DataFusionError::NotImplemented(format!( "Can't create a zero scalar from data_type \"{datatype:?}\"" @@ -3191,6 +3132,20 @@ impl ScalarValue { IntervalMonthDayNano ) } + + DataType::Duration(TimeUnit::Second) => { + typed_cast!(array, index, DurationSecondArray, DurationSecond) + } + DataType::Duration(TimeUnit::Millisecond) => { + typed_cast!(array, index, DurationMillisecondArray, DurationMillisecond) + } + DataType::Duration(TimeUnit::Microsecond) => { + typed_cast!(array, index, DurationMicrosecondArray, DurationMicrosecond) + } + DataType::Duration(TimeUnit::Nanosecond) => { + typed_cast!(array, index, DurationNanosecondArray, DurationNanosecond) + } + other => { return Err(DataFusionError::NotImplemented(format!( "Can't create a scalar from array of type \"{other:?}\"" @@ -3682,6 +3637,18 @@ impl TryFrom<&DataType> for ScalarValue { DataType::Interval(IntervalUnit::MonthDayNano) => { ScalarValue::IntervalMonthDayNano(None) } + + DataType::Duration(TimeUnit::Second) => ScalarValue::DurationSecond(None), + DataType::Duration(TimeUnit::Millisecond) => { + ScalarValue::DurationMillisecond(None) + } + DataType::Duration(TimeUnit::Microsecond) => { + ScalarValue::DurationMicrosecond(None) + } + DataType::Duration(TimeUnit::Nanosecond) => { + ScalarValue::DurationNanosecond(None) + } + DataType::Dictionary(index_type, value_type) => ScalarValue::Dictionary( index_type.clone(), Box::new(value_type.as_ref().try_into()?), @@ -3944,7 +3911,7 @@ mod tests { use std::sync::Arc; use arrow::compute::kernels; - use arrow::compute::{self, concat, is_null}; + use arrow::compute::{concat, is_null}; use arrow::datatypes::ArrowPrimitiveType; use arrow::util::pretty::pretty_format_columns; use arrow_array::ArrowNumericType; @@ -4073,7 +4040,7 @@ mod tests { let right_array = right.to_array(); let arrow_left_array = left_array.as_primitive::(); let arrow_right_array = right_array.as_primitive::(); - let arrow_result = compute::add_checked(arrow_left_array, arrow_right_array); + let arrow_result = kernels::numeric::add(arrow_left_array, arrow_right_array); assert_eq!(scalar_result.is_ok(), arrow_result.is_ok()); } diff --git a/datafusion/core/tests/sql/timestamp.rs b/datafusion/core/tests/sql/timestamp.rs index 60f602b68d63..e6134d2658ae 100644 --- a/datafusion/core/tests/sql/timestamp.rs +++ b/datafusion/core/tests/sql/timestamp.rs @@ -576,8 +576,8 @@ async fn timestamp_add_interval_months() -> Result<()> { let res2 = actual[0][1].as_str(); let format = "%Y-%m-%dT%H:%M:%S%.6fZ"; - let t1_naive = chrono::NaiveDateTime::parse_from_str(res1, format).unwrap(); - let t2_naive = chrono::NaiveDateTime::parse_from_str(res2, format).unwrap(); + let t1_naive = NaiveDateTime::parse_from_str(res1, format).unwrap(); + let t2_naive = NaiveDateTime::parse_from_str(res2, format).unwrap(); let year = t1_naive.year() + (t1_naive.month0() as i32 + 17) / 12; let month = (t1_naive.month0() + 17) % 12 + 1; @@ -1035,13 +1035,13 @@ async fn timestamp_sub_with_tz() -> Result<()> { let sql = "SELECT val, ts1 - ts2 AS ts_diff FROM table_a ORDER BY ts2 - ts1"; let actual = execute_to_batches(&ctx, sql).await; let expected = vec![ - "+-----+---------------------------------------------------+", - "| val | ts_diff |", - "+-----+---------------------------------------------------+", - "| 3 | 0 years 0 mons 0 days 10 hours 0 mins 30.000 secs |", - "| 1 | 0 years 0 mons 0 days 10 hours 0 mins 20.000 secs |", - "| 2 | 0 years 0 mons 0 days 10 hours 0 mins 10.000 secs |", - "+-----+---------------------------------------------------+", + "+-----+---------+", + "| val | ts_diff |", + "+-----+---------+", + "| 3 | PT30S |", + "| 1 | PT20S |", + "| 2 | PT10S |", + "+-----+---------+", ]; assert_batches_eq!(expected, &actual); diff --git a/datafusion/core/tests/sql/udf.rs b/datafusion/core/tests/sql/udf.rs index 0ecd5d0fde86..f40de0ae8b98 100644 --- a/datafusion/core/tests/sql/udf.rs +++ b/datafusion/core/tests/sql/udf.rs @@ -16,7 +16,7 @@ // under the License. use super::*; -use arrow::compute::add; +use arrow::compute::kernels::numeric::add; use datafusion::{ execution::registry::FunctionRegistry, physical_plan::{expressions::AvgAccumulator, functions::make_scalar_function}, diff --git a/datafusion/core/tests/sqllogictests/test_files/arrow_typeof.slt b/datafusion/core/tests/sqllogictests/test_files/arrow_typeof.slt index 4a3d39bdebcf..0e3f28196a4d 100644 --- a/datafusion/core/tests/sqllogictests/test_files/arrow_typeof.slt +++ b/datafusion/core/tests/sqllogictests/test_files/arrow_typeof.slt @@ -303,9 +303,11 @@ select arrow_cast('30 minutes', 'Interval(MonthDayNano)'); ## Duration -query error DataFusion error: Optimizer rule 'simplify_expressions' failed\ncaused by\nThis feature is not implemented: Can't create a scalar from array of type "Duration\(Second\)" +query ? --- select arrow_cast(interval '30 minutes', 'Duration(Second)'); +---- +PT1800S query error DataFusion error: Error during planning: Cannot automatically convert Utf8 to Duration\(Second\) select arrow_cast('30 minutes', 'Duration(Second)'); diff --git a/datafusion/core/tests/sqllogictests/test_files/dates.slt b/datafusion/core/tests/sqllogictests/test_files/dates.slt index c35f16bc0343..03d2fbc4f140 100644 --- a/datafusion/core/tests/sqllogictests/test_files/dates.slt +++ b/datafusion/core/tests/sqllogictests/test_files/dates.slt @@ -91,17 +91,17 @@ where d3_date > now() + '5 days'; # DATE minus DATE # https://github.com/apache/arrow-rs/issues/4383 -query error DataFusion error: Optimizer rule 'simplify_expressions' failed\ncaused by\nArrow error: Cast error: Cannot perform arithmetic operation between array of type Date32 and array of type Date32 +query error DataFusion error: Error during planning: Cannot coerce arithmetic expression Date32 \- Date32 to valid types SELECT DATE '2023-04-09' - DATE '2023-04-02'; # DATE minus Timestamp query ? SELECT DATE '2023-04-09' - '2000-01-01T00:00:00'::timestamp; ---- -0 years 0 mons 8499 days 0 hours 0 mins 0.000000000 secs +P8499D # Timestamp minus DATE query ? SELECT '2023-01-01T00:00:00'::timestamp - DATE '2021-01-01'; ---- -0 years 0 mons 730 days 0 hours 0 mins 0.000000000 secs +P730D diff --git a/datafusion/core/tests/sqllogictests/test_files/decimal.slt b/datafusion/core/tests/sqllogictests/test_files/decimal.slt index fd4e80e1afe4..dd3d1548e5a7 100644 --- a/datafusion/core/tests/sqllogictests/test_files/decimal.slt +++ b/datafusion/core/tests/sqllogictests/test_files/decimal.slt @@ -365,7 +365,7 @@ select c1*c5 from decimal_simple; query T select arrow_typeof(c1/cast(0.00001 as decimal(5,5))) from decimal_simple limit 1; ---- -Decimal128(21, 12) +Decimal128(19, 10) query R rowsort @@ -391,27 +391,27 @@ select c1/cast(0.00001 as decimal(5,5)) from decimal_simple; query T select arrow_typeof(c1/c5) from decimal_simple limit 1; ---- -Decimal128(30, 19) +Decimal128(21, 10) query R rowsort select c1/c5 from decimal_simple; ---- 0.5 -0.641025641026 -0.714285714286 -0.735294117647 +0.641025641 +0.7142857142 +0.7352941176 0.8 -0.857142857143 -0.909090909091 -0.909090909091 +0.8571428571 +0.909090909 +0.909090909 0.9375 -0.961538461538 +0.9615384615 1 1 -1.052631578947 -1.515151515152 -2.727272727273 +1.0526315789 +1.5151515151 +2.7272727272 query T diff --git a/datafusion/core/tests/sqllogictests/test_files/interval.slt b/datafusion/core/tests/sqllogictests/test_files/interval.slt index 9dd56c463699..4282a16cdded 100644 --- a/datafusion/core/tests/sqllogictests/test_files/interval.slt +++ b/datafusion/core/tests/sqllogictests/test_files/interval.slt @@ -430,15 +430,11 @@ select '1 month'::interval + '1980-01-01T12:00:00'::timestamp; ---- 1980-02-01T12:00:00 -query D +query error DataFusion error: Error during planning: Cannot coerce arithmetic expression Interval\(MonthDayNano\) \- Date32 to valid types select '1 month'::interval - '1980-01-01'::date; ----- -1979-12-01 -query P +query error DataFusion error: Error during planning: Cannot coerce arithmetic expression Interval\(MonthDayNano\) \- Timestamp\(Nanosecond, None\) to valid types select '1 month'::interval - '1980-01-01T12:00:00'::timestamp; ----- -1979-12-01T12:00:00 # interval (array) + date / timestamp (array) query D @@ -456,19 +452,11 @@ select i + ts from t; 2000-02-01T00:01:00 # expected error interval (array) - date / timestamp (array) -query D +query error DataFusion error: Error during planning: Cannot coerce arithmetic expression Interval\(MonthDayNano\) \- Date32 to valid types select i - d from t; ----- -1979-12-01 -1990-09-30 -1980-01-02 -query P +query error DataFusion error: Error during planning: Cannot coerce arithmetic expression Interval\(MonthDayNano\) \- Timestamp\(Nanosecond, None\) to valid types select i - ts from t; ----- -1999-12-01T00:00:00 -1999-12-31T12:11:10 -2000-01-31T23:59:00 # interval (scalar) + date / timestamp (array) @@ -487,19 +475,11 @@ select '1 month'::interval + ts from t; 2000-03-01T00:00:00 # expected error interval (scalar) - date / timestamp (array) -query D +query error DataFusion error: Error during planning: Cannot coerce arithmetic expression Interval\(MonthDayNano\) \- Date32 to valid types select '1 month'::interval - d from t; ----- -1979-12-01 -1990-09-01 -1979-12-02 -query P +query error DataFusion error: Error during planning: Cannot coerce arithmetic expression Interval\(MonthDayNano\) \- Timestamp\(Nanosecond, None\) to valid types select '1 month'::interval - ts from t; ----- -1999-12-01T00:00:00 -1999-12-01T12:11:10 -2000-01-01T00:00:00 # interval + date query D diff --git a/datafusion/core/tests/sqllogictests/test_files/timestamps.slt b/datafusion/core/tests/sqllogictests/test_files/timestamps.slt index 5250ce239939..2d65e8f3f996 100644 --- a/datafusion/core/tests/sqllogictests/test_files/timestamps.slt +++ b/datafusion/core/tests/sqllogictests/test_files/timestamps.slt @@ -1182,10 +1182,10 @@ create table bar (val int, i1 interval, i2 interval) as values query I? SELECT val, ts1 - ts2 FROM foo ORDER BY ts2 - ts1; ---- -4 0 years 0 mons -15250 days -13 hours -28 mins -44.999876545 secs -3 0 years 0 mons 15952 days 23 hours 22 mins 12.667123455 secs -2 0 years 0 mons 8406 days 1 hours 1 mins 54.877123455 secs -1 0 years 0 mons 53 days 16 hours 0 mins 20.000000024 secs +3 P15952DT84132.667123455S +2 P8406DT3714.877123455S +1 P53DT57620.000000024S +4 -P15250DT48524.999876545S # Interval - Interval query ? @@ -1233,7 +1233,7 @@ SELECT ts1 + i FROM foo; 2003-07-12T01:31:15.000123463 # Timestamp + Timestamp => error -query error DataFusion error: Arrow error: Cast error: Cannot perform arithmetic operation between array of type Timestamp\(Nanosecond, None\) and array of type Timestamp\(Nanosecond, None\) +query error DataFusion error: Error during planning: Cannot get result type for temporal operation Timestamp\(Nanosecond, None\) \+ Timestamp\(Nanosecond, None\): Invalid argument error: Invalid timestamp arithmetic operation: Timestamp\(Nanosecond, None\) AddWrapping Timestamp\(Nanosecond, None\) SELECT ts1 + ts2 FROM foo; @@ -1241,16 +1241,16 @@ FROM foo; query ? SELECT '2000-01-01T00:00:00'::timestamp - '2000-01-01T00:00:00'::timestamp; ---- -0 years 0 mons 0 days 0 hours 0 mins 0.000000000 secs +PT0S # large timestamp - small timestamp query ? SELECT '2000-01-01T00:00:00'::timestamp - '2010-01-01T00:00:00'::timestamp; ---- -0 years 0 mons -3653 days 0 hours 0 mins 0.000000000 secs +-P3653D # Interval - Timestamp => error -statement error DataFusion error: type_coercion\ncaused by\nError during planning: Interval\(MonthDayNano\) \- Timestamp\(Nanosecond, None\) can't be evaluated because there isn't a common type to coerce the types to +statement error DataFusion error: Error during planning: Cannot coerce arithmetic expression Interval\(MonthDayNano\) \- Timestamp\(Nanosecond, None\) to valid types SELECT i - ts1 from FOO; statement ok diff --git a/datafusion/core/tests/sqllogictests/test_files/type_coercion.slt b/datafusion/core/tests/sqllogictests/test_files/type_coercion.slt index 8b329df0c138..aa1e6826eca5 100644 --- a/datafusion/core/tests/sqllogictests/test_files/type_coercion.slt +++ b/datafusion/core/tests/sqllogictests/test_files/type_coercion.slt @@ -43,13 +43,9 @@ SELECT '2023-05-01 12:30:00'::timestamp - interval '1 month'; 2023-04-01T12:30:00 # interval - date -query D +query error DataFusion error: Error during planning: Cannot coerce arithmetic expression Interval\(MonthDayNano\) \- Date32 to valid types select interval '1 month' - '2023-05-01'::date; ----- -2023-04-01 # interval - timestamp -query P +query error DataFusion error: Error during planning: Cannot coerce arithmetic expression Interval\(MonthDayNano\) \- Timestamp\(Nanosecond, None\) to valid types SELECT interval '1 month' - '2023-05-01 12:30:00'::timestamp; ----- -2023-04-01T12:30:00 diff --git a/datafusion/expr/src/type_coercion/binary.rs b/datafusion/expr/src/type_coercion/binary.rs index c510822445a8..36e7ba867840 100644 --- a/datafusion/expr/src/type_coercion/binary.rs +++ b/datafusion/expr/src/type_coercion/binary.rs @@ -17,6 +17,7 @@ //! Coercion rules for matching argument types for binary operators +use arrow::array::{new_empty_array, Array}; use arrow::compute::can_cast_types; use arrow::datatypes::{ DataType, TimeUnit, DECIMAL128_MAX_PRECISION, DECIMAL128_MAX_SCALE, @@ -118,8 +119,23 @@ fn signature(lhs: &DataType, op: &Operator, rhs: &DataType) -> Result Operator::Multiply | Operator::Divide| Operator::Modulo => { - // TODO: this logic would be easier to follow if the functions were inlined - if let Some(ret) = mathematics_temporal_result_type(lhs, rhs) { + let get_result = |lhs, rhs| { + use arrow::compute::kernels::numeric::*; + let l = new_empty_array(lhs); + let r = new_empty_array(rhs); + + let result = match op { + Operator::Plus => add_wrapping(&l, &r), + Operator::Minus => sub_wrapping(&l, &r), + Operator::Multiply => mul_wrapping(&l, &r), + Operator::Divide => div(&l, &r), + Operator::Modulo => rem(&l, &r), + _ => unreachable!(), + }; + result.map(|x| x.data_type().clone()) + }; + + if let Ok(ret) = get_result(lhs, rhs) { // Temporal arithmetic, e.g. Date32 + Interval Ok(Signature{ lhs: lhs.clone(), @@ -129,9 +145,9 @@ fn signature(lhs: &DataType, op: &Operator, rhs: &DataType) -> Result } else if let Some(coerced) = temporal_coercion(lhs, rhs) { // Temporal arithmetic by first coercing to a common time representation // e.g. Date32 - Timestamp - let ret = mathematics_temporal_result_type(&coerced, &coerced).ok_or_else(|| { + let ret = get_result(&coerced, &coerced).map_err(|e| { DataFusionError::Plan(format!( - "Cannot get result type for temporal operation {coerced} {op} {coerced}" + "Cannot get result type for temporal operation {coerced} {op} {coerced}: {e}" )) })?; Ok(Signature{ @@ -141,9 +157,9 @@ fn signature(lhs: &DataType, op: &Operator, rhs: &DataType) -> Result }) } else if let Some((lhs, rhs)) = math_decimal_coercion(lhs, rhs) { // Decimal arithmetic, e.g. Decimal(10, 2) + Decimal(10, 0) - let ret = decimal_op_mathematics_type(op, &lhs, &rhs).ok_or_else(|| { + let ret = get_result(&lhs, &rhs).map_err(|e| { DataFusionError::Plan(format!( - "Cannot get result type for decimal operation {lhs} {op} {rhs}" + "Cannot get result type for decimal operation {lhs} {op} {rhs}: {e}" )) })?; Ok(Signature{ @@ -163,43 +179,6 @@ fn signature(lhs: &DataType, op: &Operator, rhs: &DataType) -> Result } } -/// Returns the result type of applying mathematics operations such as -/// `+` to arguments of `lhs_type` and `rhs_type`. -fn mathematics_temporal_result_type( - lhs_type: &DataType, - rhs_type: &DataType, -) -> Option { - use arrow::datatypes::DataType::*; - use arrow::datatypes::IntervalUnit::*; - use arrow::datatypes::TimeUnit::*; - - match (lhs_type, rhs_type) { - // datetime +/- interval - (Interval(_), Timestamp(_, _)) => Some(rhs_type.clone()), - (Timestamp(_, _), Interval(_)) => Some(lhs_type.clone()), - (Interval(_), Date32) => Some(rhs_type.clone()), - (Date32, Interval(_)) => Some(lhs_type.clone()), - (Interval(_), Date64) => Some(rhs_type.clone()), - (Date64, Interval(_)) => Some(lhs_type.clone()), - // interval +/- - (Interval(l), Interval(h)) if l == h => Some(lhs_type.clone()), - (Interval(_), Interval(_)) => Some(Interval(MonthDayNano)), - // timestamp - timestamp - (Timestamp(Second, _), Timestamp(Second, _)) - | (Timestamp(Millisecond, _), Timestamp(Millisecond, _)) => { - Some(Interval(DayTime)) - } - (Timestamp(Microsecond, _), Timestamp(Microsecond, _)) - | (Timestamp(Nanosecond, _), Timestamp(Nanosecond, _)) => { - Some(Interval(MonthDayNano)) - } - // date - date - (Date32, Date32) => Some(Interval(DayTime)), - (Date64, Date64) => Some(Interval(MonthDayNano)), - _ => None, - } -} - /// returns the resulting type of a binary expression evaluating the `op` with the left and right hand types pub fn get_result_type( lhs: &DataType, @@ -517,107 +496,6 @@ fn create_decimal_type(precision: u8, scale: i8) -> DataType { ) } -/// Returns the coerced type of applying mathematics operations on decimal types. -/// Two sides of the mathematics operation will be coerced to the same type. Note -/// that we don't coerce the decimal operands in analysis phase, but do it in the -/// execution phase because this is not idempotent. -pub fn coercion_decimal_mathematics_type( - mathematics_op: &Operator, - left_decimal_type: &DataType, - right_decimal_type: &DataType, -) -> Option { - // TODO: Move this logic into kernel implementations - use arrow::datatypes::DataType::*; - match (left_decimal_type, right_decimal_type) { - // The promotion rule from spark - // https://github.com/apache/spark/blob/c20af535803a7250fef047c2bf0fe30be242369d/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala#L35 - (Decimal128(_, _), Decimal128(_, _)) => match mathematics_op { - Operator::Plus | Operator::Minus => decimal_op_mathematics_type( - mathematics_op, - left_decimal_type, - right_decimal_type, - ), - Operator::Divide | Operator::Modulo => { - get_wider_decimal_type(left_decimal_type, right_decimal_type) - } - _ => None, - }, - _ => None, - } -} - -/// Returns the output type of applying mathematics operations on two decimal types. -/// The rule is from spark. Note that this is different to the coerced type applied -/// to two sides of the arithmetic operation. -pub fn decimal_op_mathematics_type( - mathematics_op: &Operator, - left_decimal_type: &DataType, - right_decimal_type: &DataType, -) -> Option { - use arrow::datatypes::DataType::*; - match (left_decimal_type, right_decimal_type) { - // The coercion rule from spark - // https://github.com/apache/spark/blob/c20af535803a7250fef047c2bf0fe30be242369d/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala#L35 - (Decimal128(p1, s1), Decimal128(p2, s2)) => { - match mathematics_op { - Operator::Plus | Operator::Minus => { - // max(s1, s2) - let result_scale = *s1.max(s2); - // max(s1, s2) + max(p1-s1, p2-s2) + 1 - let result_precision = - result_scale + (*p1 as i8 - *s1).max(*p2 as i8 - *s2) + 1; - Some(create_decimal_type(result_precision as u8, result_scale)) - } - Operator::Multiply => { - // s1 + s2 - let result_scale = *s1 + *s2; - // p1 + p2 + 1 - let result_precision = *p1 + *p2 + 1; - Some(create_decimal_type(result_precision, result_scale)) - } - Operator::Divide => { - // max(6, s1 + p2 + 1) - let result_scale = 6.max(*s1 + *p2 as i8 + 1); - // p1 - s1 + s2 + max(6, s1 + p2 + 1) - let result_precision = result_scale + *p1 as i8 - *s1 + *s2; - Some(create_decimal_type(result_precision as u8, result_scale)) - } - Operator::Modulo => { - // max(s1, s2) - let result_scale = *s1.max(s2); - // min(p1-s1, p2-s2) + max(s1, s2) - let result_precision = - result_scale + (*p1 as i8 - *s1).min(*p2 as i8 - *s2); - Some(create_decimal_type(result_precision as u8, result_scale)) - } - _ => None, - } - } - (Dictionary(_, lhs_value_type), Dictionary(_, rhs_value_type)) => { - decimal_op_mathematics_type( - mathematics_op, - lhs_value_type.as_ref(), - rhs_value_type.as_ref(), - ) - } - (Dictionary(key_type, value_type), _) => { - let value_type = decimal_op_mathematics_type( - mathematics_op, - value_type.as_ref(), - right_decimal_type, - ); - value_type - .map(|value_type| Dictionary(key_type.clone(), Box::new(value_type))) - } - (_, Dictionary(_, value_type)) => decimal_op_mathematics_type( - mathematics_op, - left_decimal_type, - value_type.as_ref(), - ), - _ => None, - } -} - /// Determine if at least of one of lhs and rhs is numeric, and the other must be NULL or numeric fn both_numeric_or_null_and_numeric(lhs_type: &DataType, rhs_type: &DataType) -> bool { use arrow::datatypes::DataType::*; @@ -817,8 +695,8 @@ fn null_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option { mod tests { use arrow::datatypes::DataType; - use datafusion_common::DataFusionError; use datafusion_common::Result; + use datafusion_common::{assert_contains, DataFusionError}; use crate::Operator; @@ -916,53 +794,6 @@ mod tests { coerce_numeric_type_to_decimal(&DataType::Float64).unwrap(), DataType::Decimal128(30, 15) ); - - let op = Operator::Plus; - let left_decimal_type = DataType::Decimal128(10, 3); - let right_decimal_type = DataType::Decimal128(20, 4); - let result = coercion_decimal_mathematics_type( - &op, - &left_decimal_type, - &right_decimal_type, - ); - assert_eq!(DataType::Decimal128(21, 4), result.unwrap()); - let op = Operator::Minus; - let result = coercion_decimal_mathematics_type( - &op, - &left_decimal_type, - &right_decimal_type, - ); - assert_eq!(DataType::Decimal128(21, 4), result.unwrap()); - let op = Operator::Multiply; - let result = coercion_decimal_mathematics_type( - &op, - &left_decimal_type, - &right_decimal_type, - ); - assert_eq!(None, result); - let result = - decimal_op_mathematics_type(&op, &left_decimal_type, &right_decimal_type); - assert_eq!(DataType::Decimal128(31, 7), result.unwrap()); - let op = Operator::Divide; - let result = coercion_decimal_mathematics_type( - &op, - &left_decimal_type, - &right_decimal_type, - ); - assert_eq!(DataType::Decimal128(20, 4), result.unwrap()); - let result = - decimal_op_mathematics_type(&op, &left_decimal_type, &right_decimal_type); - assert_eq!(DataType::Decimal128(35, 24), result.unwrap()); - let op = Operator::Modulo; - let result = coercion_decimal_mathematics_type( - &op, - &left_decimal_type, - &right_decimal_type, - ); - assert_eq!(DataType::Decimal128(20, 4), result.unwrap()); - let result = - decimal_op_mathematics_type(&op, &left_decimal_type, &right_decimal_type); - assert_eq!(DataType::Decimal128(11, 4), result.unwrap()); } #[test] @@ -1022,11 +853,14 @@ mod tests { assert_eq!(lhs.to_string(), "Timestamp(Millisecond, None)"); assert_eq!(rhs.to_string(), "Timestamp(Millisecond, None)"); - let (lhs, rhs) = - get_input_types(&DataType::Date32, &Operator::Plus, &DataType::Date64) - .unwrap(); - assert_eq!(lhs.to_string(), "Date64"); - assert_eq!(rhs.to_string(), "Date64"); + let err = get_input_types(&DataType::Date32, &Operator::Plus, &DataType::Date64) + .unwrap_err() + .to_string(); + + assert_contains!( + &err, + "Cannot get result type for temporal operation Date64 + Date64" + ); Ok(()) } @@ -1225,26 +1059,13 @@ mod tests { fn test_math_decimal_coercion_rule( lhs_type: DataType, rhs_type: DataType, - mathematics_op: Operator, expected_lhs_type: DataType, expected_rhs_type: DataType, - expected_coerced_type: Option, - expected_output_type: DataType, ) { // The coerced types for lhs and rhs, if any of them is not decimal let (lhs_type, rhs_type) = math_decimal_coercion(&lhs_type, &rhs_type).unwrap(); assert_eq!(lhs_type, expected_lhs_type); assert_eq!(rhs_type, expected_rhs_type); - - // The coerced type of decimal math expression, applied during expression evaluation - let coerced_type = - coercion_decimal_mathematics_type(&mathematics_op, &lhs_type, &rhs_type); - assert_eq!(coerced_type, expected_coerced_type); - - // The output type of decimal math expression - let output_type = - decimal_op_mathematics_type(&mathematics_op, &lhs_type, &rhs_type).unwrap(); - assert_eq!(output_type, expected_output_type); } #[test] @@ -1252,61 +1073,43 @@ mod tests { test_math_decimal_coercion_rule( DataType::Decimal128(10, 2), DataType::Decimal128(10, 2), - Operator::Plus, DataType::Decimal128(10, 2), DataType::Decimal128(10, 2), - Some(DataType::Decimal128(11, 2)), - DataType::Decimal128(11, 2), ); test_math_decimal_coercion_rule( DataType::Int32, DataType::Decimal128(10, 2), - Operator::Plus, DataType::Decimal128(10, 0), DataType::Decimal128(10, 2), - Some(DataType::Decimal128(13, 2)), - DataType::Decimal128(13, 2), ); test_math_decimal_coercion_rule( DataType::Int32, DataType::Decimal128(10, 2), - Operator::Minus, DataType::Decimal128(10, 0), DataType::Decimal128(10, 2), - Some(DataType::Decimal128(13, 2)), - DataType::Decimal128(13, 2), ); test_math_decimal_coercion_rule( DataType::Int32, DataType::Decimal128(10, 2), - Operator::Multiply, DataType::Decimal128(10, 0), DataType::Decimal128(10, 2), - None, - DataType::Decimal128(21, 2), ); test_math_decimal_coercion_rule( DataType::Int32, DataType::Decimal128(10, 2), - Operator::Divide, DataType::Decimal128(10, 0), DataType::Decimal128(10, 2), - Some(DataType::Decimal128(12, 2)), - DataType::Decimal128(23, 11), ); test_math_decimal_coercion_rule( DataType::Int32, DataType::Decimal128(10, 2), - Operator::Modulo, DataType::Decimal128(10, 0), DataType::Decimal128(10, 2), - Some(DataType::Decimal128(12, 2)), - DataType::Decimal128(10, 2), ); Ok(()) diff --git a/datafusion/physical-expr/src/expressions/binary.rs b/datafusion/physical-expr/src/expressions/binary.rs index 71e26ee45b44..51e8cd411f4e 100644 --- a/datafusion/physical-expr/src/expressions/binary.rs +++ b/datafusion/physical-expr/src/expressions/binary.rs @@ -23,13 +23,7 @@ use std::hash::{Hash, Hasher}; use std::{any::Any, sync::Arc}; use arrow::array::*; -use arrow::compute::kernels::arithmetic::{ - add_dyn, add_scalar_dyn as add_dyn_scalar, divide_dyn_checked, - divide_scalar_dyn as divide_dyn_scalar, modulus_dyn, - modulus_scalar_dyn as modulus_dyn_scalar, multiply_dyn, - multiply_scalar_dyn as multiply_dyn_scalar, subtract_dyn, - subtract_scalar_dyn as subtract_dyn_scalar, -}; +use arrow::compute::cast; use arrow::compute::kernels::boolean::{and_kleene, not, or_kleene}; use arrow::compute::kernels::comparison::regexp_is_match_utf8; use arrow::compute::kernels::comparison::regexp_is_match_utf8_scalar; @@ -49,41 +43,32 @@ use arrow::compute::kernels::comparison::{ eq_dyn_utf8_scalar, gt_dyn_utf8_scalar, gt_eq_dyn_utf8_scalar, lt_dyn_utf8_scalar, lt_eq_dyn_utf8_scalar, neq_dyn_utf8_scalar, }; -use arrow::compute::{cast, CastOptions}; use arrow::datatypes::*; use adapter::{eq_dyn, gt_dyn, gt_eq_dyn, lt_dyn, lt_eq_dyn, neq_dyn}; use arrow::compute::kernels::concat_elements::concat_elements_utf8; -use datafusion_expr::type_coercion::{is_decimal, is_timestamp, is_utf8_or_large_utf8}; use kernels::{ bitwise_and_dyn, bitwise_and_dyn_scalar, bitwise_or_dyn, bitwise_or_dyn_scalar, bitwise_shift_left_dyn, bitwise_shift_left_dyn_scalar, bitwise_shift_right_dyn, bitwise_shift_right_dyn_scalar, bitwise_xor_dyn, bitwise_xor_dyn_scalar, }; use kernels_arrow::{ - add_decimal_dyn_scalar, add_dyn_decimal, add_dyn_temporal, divide_decimal_dyn_scalar, - divide_dyn_checked_decimal, is_distinct_from, is_distinct_from_binary, - is_distinct_from_bool, is_distinct_from_decimal, is_distinct_from_f32, - is_distinct_from_f64, is_distinct_from_null, is_distinct_from_utf8, - is_not_distinct_from, is_not_distinct_from_binary, is_not_distinct_from_bool, - is_not_distinct_from_decimal, is_not_distinct_from_f32, is_not_distinct_from_f64, - is_not_distinct_from_null, is_not_distinct_from_utf8, modulus_decimal_dyn_scalar, - modulus_dyn_decimal, multiply_decimal_dyn_scalar, multiply_dyn_decimal, - subtract_decimal_dyn_scalar, subtract_dyn_decimal, subtract_dyn_temporal, + is_distinct_from, is_distinct_from_binary, is_distinct_from_bool, + is_distinct_from_decimal, is_distinct_from_f32, is_distinct_from_f64, + is_distinct_from_null, is_distinct_from_utf8, is_not_distinct_from, + is_not_distinct_from_binary, is_not_distinct_from_bool, is_not_distinct_from_decimal, + is_not_distinct_from_f32, is_not_distinct_from_f64, is_not_distinct_from_null, + is_not_distinct_from_utf8, }; use arrow::datatypes::{DataType, Schema, TimeUnit}; use arrow::record_batch::RecordBatch; - -use self::kernels_arrow::{ - add_dyn_temporal_left_scalar, add_dyn_temporal_right_scalar, - subtract_dyn_temporal_left_scalar, subtract_dyn_temporal_right_scalar, -}; +use arrow_array::{Datum, Scalar}; use super::column::Column; use crate::array_expressions::{array_append, array_concat, array_prepend}; -use crate::expressions::cast_column; + use crate::intervals::cp_solver::{propagate_arithmetic, propagate_comparison}; use crate::intervals::{apply_operator, Interval}; use crate::physical_expr::down_cast_any_ref; @@ -92,9 +77,7 @@ use datafusion_common::cast::as_boolean_array; use datafusion_common::ScalarValue; use datafusion_common::{DataFusionError, Result}; -use datafusion_expr::type_coercion::binary::{ - coercion_decimal_mathematics_type, get_result_type, -}; +use datafusion_expr::type_coercion::binary::get_result_type; use datafusion_expr::{ColumnarValue, Operator}; /// Binary expression @@ -371,47 +354,6 @@ macro_rules! compute_op_dyn_scalar { }}; } -/// Invoke a dyn compute kernel on a data array and a scalar value -/// LEFT is Primitive or Dictionary array of numeric values, RIGHT is scalar value -/// OP_TYPE is the return type of scalar function -/// SCALAR_TYPE is the type of the scalar value -/// Different to `compute_op_dyn_scalar`, this calls the `_dyn_scalar` functions that -/// take a `SCALAR_TYPE`. -macro_rules! compute_primitive_op_dyn_scalar { - ($LEFT:expr, $RIGHT:expr, $OP:ident, $OP_TYPE:expr, $SCALAR_TYPE:ident) => {{ - // generate the scalar function name, such as lt_dyn_scalar, from the $OP parameter - // (which could have a value of lt_dyn) and the suffix _scalar - if let Some(value) = $RIGHT { - Ok(Arc::new(paste::expr! {[<$OP _dyn_scalar>]::<$SCALAR_TYPE>}( - $LEFT, - value, - )?)) - } else { - // when the $RIGHT is a NULL, generate a NULL array of $OP_TYPE - Ok(Arc::new(new_null_array($OP_TYPE, $LEFT.len()))) - } - }}; -} - -/// Invoke a dyn decimal compute kernel on a data array and a scalar value -/// LEFT is Decimal or Dictionary array of decimal values, RIGHT is scalar value -/// OP_TYPE is the return type of scalar function -macro_rules! compute_primitive_decimal_op_dyn_scalar { - ($LEFT:expr, $RIGHT:expr, $OP:ident, $OP_TYPE:expr, $RET_TYPE:expr) => {{ - // generate the scalar function name, such as add_decimal_dyn_scalar, - // from the $OP parameter (which could have a value of add) and the - // suffix _decimal_dyn_scalar - if let Some(value) = $RIGHT { - Ok(paste::expr! {[<$OP _decimal_dyn_scalar>]}( - $LEFT, value, $RET_TYPE, - )?) - } else { - // when the $RIGHT is a NULL, generate a NULL array of $OP_TYPE - Ok(Arc::new(new_null_array($OP_TYPE, $LEFT.len()))) - } - }}; -} - /// Invoke a compute kernel on array(s) macro_rules! compute_op { // invoke binary operator @@ -449,58 +391,6 @@ macro_rules! binary_string_array_op { }}; } -/// Invoke a compute kernel on a pair of arrays -/// The binary_primitive_array_op macro only evaluates for primitive types -/// like integers and floats. -macro_rules! binary_primitive_array_op_dyn { - ($LEFT:expr, $RIGHT:expr, $OP:ident, $RET_TYPE:expr) => {{ - match $LEFT.data_type() { - DataType::Decimal128(_, _) => { - Ok(paste::expr! {[<$OP _decimal>]}(&$LEFT, &$RIGHT, $RET_TYPE)?) - } - DataType::Dictionary(_, value_type) - if matches!(value_type.as_ref(), &DataType::Decimal128(_, _)) => - { - Ok(paste::expr! {[<$OP _decimal>]}(&$LEFT, &$RIGHT, $RET_TYPE)?) - } - _ => Ok(Arc::new( - $OP(&$LEFT, &$RIGHT).map_err(|err| DataFusionError::ArrowError(err))?, - )), - } - }}; -} - -/// Invoke a compute dyn kernel on an array and a scalar -/// The binary_primitive_array_op_dyn_scalar macro only evaluates for primitive -/// types like integers and floats. -macro_rules! binary_primitive_array_op_dyn_scalar { - ($LEFT:expr, $RIGHT:expr, $OP:ident, $RET_TYPE:expr) => {{ - // unwrap underlying (non dictionary) value - let right = unwrap_dict_value($RIGHT); - let op_type = $LEFT.data_type(); - - let result: Result> = match right { - ScalarValue::Decimal128(v, _, _) => compute_primitive_decimal_op_dyn_scalar!($LEFT, v, $OP, op_type, $RET_TYPE), - ScalarValue::Int8(v) => compute_primitive_op_dyn_scalar!($LEFT, v, $OP, op_type, Int8Type), - ScalarValue::Int16(v) => compute_primitive_op_dyn_scalar!($LEFT, v, $OP, op_type, Int16Type), - ScalarValue::Int32(v) => compute_primitive_op_dyn_scalar!($LEFT, v, $OP, op_type, Int32Type), - ScalarValue::Int64(v) => compute_primitive_op_dyn_scalar!($LEFT, v, $OP, op_type, Int64Type), - ScalarValue::UInt8(v) => compute_primitive_op_dyn_scalar!($LEFT, v, $OP, op_type, UInt8Type), - ScalarValue::UInt16(v) => compute_primitive_op_dyn_scalar!($LEFT, v, $OP, op_type, UInt16Type), - ScalarValue::UInt32(v) => compute_primitive_op_dyn_scalar!($LEFT, v, $OP, op_type, UInt32Type), - ScalarValue::UInt64(v) => compute_primitive_op_dyn_scalar!($LEFT, v, $OP, op_type, UInt64Type), - ScalarValue::Float32(v) => compute_primitive_op_dyn_scalar!($LEFT, v, $OP, op_type, Float32Type), - ScalarValue::Float64(v) => compute_primitive_op_dyn_scalar!($LEFT, v, $OP, op_type, Float64Type), - other => Err(DataFusionError::Internal(format!( - "Data type {:?} not supported for scalar operation '{}' on dyn array", - other, stringify!($OP))) - ) - }; - - Some(result) - }} -} - /// The binary_array_op macro includes types that extend beyond the primitive, /// such as Utf8 strings. #[macro_export] @@ -693,21 +583,29 @@ impl PhysicalExpr for BinaryExpr { let schema = batch.schema(); let input_schema = schema.as_ref(); - // Coerce decimal types to the same scale and precision - let coerced_type = coercion_decimal_mathematics_type( - &self.op, - &left_data_type, - &right_data_type, - ); - let (left_value, right_value) = if let Some(coerced_type) = coerced_type { - let options = CastOptions::default(); - let left_value = cast_column(&left_value, &coerced_type, Some(&options))?; - let right_value = cast_column(&right_value, &coerced_type, Some(&options))?; - (left_value, right_value) - } else { - // No need to coerce if it is not decimal or not math operation - (left_value, right_value) - }; + if self.op.is_numerical_operators() { + return match (&left_value, &right_value) { + (ColumnarValue::Array(left), ColumnarValue::Array(right)) => { + self.evaluate_datum(&*left, &*right) + } + (ColumnarValue::Scalar(left), ColumnarValue::Array(right)) => { + let left = left.to_array(); + self.evaluate_datum(&Scalar::new(left.as_ref()), &*right) + } + (ColumnarValue::Array(left), ColumnarValue::Scalar(right)) => { + let right = right.to_array(); + self.evaluate_datum(&*left, &Scalar::new(right.as_ref())) + } + (ColumnarValue::Scalar(left), ColumnarValue::Scalar(right)) => { + let left = left.to_array(); + let right = right.to_array(); + self.evaluate_datum( + &Scalar::new(left.as_ref()), + &Scalar::new(right.as_ref()), + ) + } + }; + } let result_type = self.data_type(input_schema)?; @@ -715,10 +613,9 @@ impl PhysicalExpr for BinaryExpr { let scalar_result = match (&left_value, &right_value) { (ColumnarValue::Array(array), ColumnarValue::Scalar(scalar)) => { // if left is array and right is literal - use scalar operations - self.evaluate_array_scalar(array, scalar.clone(), &result_type)? - .map(|r| { - r.and_then(|a| to_result_type_array(&self.op, a, &result_type)) - }) + self.evaluate_array_scalar(array, scalar.clone())?.map(|r| { + r.and_then(|a| to_result_type_array(&self.op, a, &result_type)) + }) } (ColumnarValue::Scalar(scalar), ColumnarValue::Array(array)) => { // if right is literal and left is array - reverse operator and parameters @@ -736,14 +633,8 @@ impl PhysicalExpr for BinaryExpr { left_value.into_array(batch.num_rows()), right_value.into_array(batch.num_rows()), ); - self.evaluate_with_resolved_args( - left, - &left_data_type, - right, - &right_data_type, - &result_type, - ) - .map(|a| ColumnarValue::Array(a)) + self.evaluate_with_resolved_args(left, &left_data_type, right, &right_data_type) + .map(|a| ColumnarValue::Array(a)) } fn children(&self) -> Vec> { @@ -1088,13 +979,35 @@ fn to_result_type_array( } impl BinaryExpr { + /// Evaluate the expression using [`Datum`] + fn evaluate_datum( + &self, + left: &dyn Datum, + right: &dyn Datum, + ) -> Result { + use arrow::compute::kernels::numeric::*; + let array = match self.op { + Operator::Plus => add_wrapping(left, right)?, + Operator::Minus => sub_wrapping(left, right)?, + Operator::Multiply => mul_wrapping(left, right)?, + Operator::Divide => div(left, right)?, + Operator::Modulo => rem(left, right)?, + _ => unreachable!(), + }; + + if left.get().1 && right.get().1 { + let scalar = ScalarValue::try_from_array(array.as_ref(), 0)?; + return Ok(ColumnarValue::Scalar(scalar)); + } + Ok(ColumnarValue::Array(array)) + } + /// Evaluate the expression of the left input is an array and /// right is literal - use scalar operations fn evaluate_array_scalar( &self, array: &dyn Array, scalar: ScalarValue, - result_type: &DataType, ) -> Result>> { use Operator::*; let bool_type = &DataType::Boolean; @@ -1105,26 +1018,8 @@ impl BinaryExpr { GtEq => binary_array_op_dyn_scalar!(array, scalar, gt_eq, bool_type), Eq => binary_array_op_dyn_scalar!(array, scalar, eq, bool_type), NotEq => binary_array_op_dyn_scalar!(array, scalar, neq, bool_type), - Plus => { - binary_primitive_array_op_dyn_scalar!(array, scalar, add, result_type) - } - Minus => binary_primitive_array_op_dyn_scalar!( - array, - scalar, - subtract, - result_type - ), - Multiply => binary_primitive_array_op_dyn_scalar!( - array, - scalar, - multiply, - result_type - ), - Divide => { - binary_primitive_array_op_dyn_scalar!(array, scalar, divide, result_type) - } - Modulo => { - binary_primitive_array_op_dyn_scalar!(array, scalar, modulus, result_type) + Plus | Minus | Multiply | Divide | Modulo => { + unreachable!() } RegexMatch => binary_string_array_flag_op_scalar!( array, @@ -1194,7 +1089,6 @@ impl BinaryExpr { left_data_type: &DataType, right: Arc, right_data_type: &DataType, - result_type: &DataType, ) -> Result { use Operator::*; match &self.op { @@ -1215,24 +1109,7 @@ impl BinaryExpr { } } IsNotDistinctFrom => binary_array_op!(left, right, is_not_distinct_from), - Plus => binary_primitive_array_op_dyn!(left, right, add_dyn, result_type), - Minus => { - binary_primitive_array_op_dyn!(left, right, subtract_dyn, result_type) - } - Multiply => { - binary_primitive_array_op_dyn!(left, right, multiply_dyn, result_type) - } - Divide => { - binary_primitive_array_op_dyn!( - left, - right, - divide_dyn_checked, - result_type - ) - } - Modulo => { - binary_primitive_array_op_dyn!(left, right, modulus_dyn, result_type) - } + Plus | Minus | Multiply | Divide | Modulo => unreachable!(), And => { if left_data_type == &DataType::Boolean { boolean_op!(&left, &right, and_kleene) @@ -1289,56 +1166,11 @@ pub fn binary( lhs: Arc, op: Operator, rhs: Arc, - input_schema: &Schema, + _input_schema: &Schema, ) -> Result> { - let lhs_type = &lhs.data_type(input_schema)?; - let rhs_type = &rhs.data_type(input_schema)?; - if (is_utf8_or_large_utf8(lhs_type) && is_timestamp(rhs_type)) - || (is_timestamp(lhs_type) && is_utf8_or_large_utf8(rhs_type)) - { - return Err(DataFusionError::Plan(format!( - "The type of {lhs_type} {op:?} {rhs_type} of binary physical should be same" - ))); - } - if !lhs_type.eq(rhs_type) && (!is_decimal(lhs_type) && !is_decimal(rhs_type)) { - return Err(DataFusionError::Internal(format!( - "The type of {lhs_type} {op:?} {rhs_type} of binary physical should be same" - ))); - } Ok(Arc::new(BinaryExpr::new(lhs, op, rhs))) } -pub fn resolve_temporal_op( - lhs: &ArrayRef, - sign: i32, - rhs: &ArrayRef, -) -> Result { - match sign { - 1 => add_dyn_temporal(lhs, rhs), - -1 => subtract_dyn_temporal(lhs, rhs), - other => Err(DataFusionError::Internal(format!( - "Undefined operation for temporal types {other}" - ))), - } -} - -pub fn resolve_temporal_op_scalar( - arr: &ArrayRef, - sign: i32, - scalar: &ScalarValue, - swap: bool, -) -> Result { - match (sign, swap) { - (1, false) => add_dyn_temporal_right_scalar(arr, scalar), - (1, true) => add_dyn_temporal_left_scalar(scalar, arr), - (-1, false) => subtract_dyn_temporal_right_scalar(arr, scalar), - (-1, true) => subtract_dyn_temporal_left_scalar(scalar, arr), - _ => Err(DataFusionError::Internal( - "Undefined operation for temporal types".to_string(), - )), - } -} - #[cfg(test)] mod tests { use super::*; @@ -2605,14 +2437,14 @@ mod tests { Operator::Divide, create_decimal_array( &[ - Some(99193548387), // 0.99193548387 + Some(9919), // 0.9919 None, None, - Some(100813008130), // 1.0081300813 - Some(100000000000), // 1.0 + Some(10081), // 1.0081 + Some(10000), // 1.0 ], - 21, - 11, + 14, + 4, ), )?; @@ -2691,15 +2523,9 @@ mod tests { let a = DictionaryArray::try_new(keys, decimal_array)?; let decimal_array = Arc::new(create_decimal_array( - &[ - Some(6150000000000), - Some(6100000000000), - None, - Some(6200000000000), - Some(6150000000000), - ], - 21, - 11, + &[Some(615000), Some(610000), None, Some(620000), Some(615000)], + 14, + 4, )); apply_arithmetic_scalar( @@ -4135,14 +3961,9 @@ mod tests { Field::new("b", DataType::Decimal128(10, 2), true), ])); let expect = Arc::new(create_decimal_array( - &[ - Some(10000000000000), - None, - Some(10081967213114), - Some(10000000000000), - ], - 23, - 11, + &[Some(1000000), None, Some(1008196), Some(1000000)], + 16, + 4, )) as ArrayRef; apply_decimal_arithmetic_op( &schema, diff --git a/datafusion/physical-expr/src/expressions/binary/kernels_arrow.rs b/datafusion/physical-expr/src/expressions/binary/kernels_arrow.rs index e7d7f62c86d2..26ff7c369e5d 100644 --- a/datafusion/physical-expr/src/expressions/binary/kernels_arrow.rs +++ b/datafusion/physical-expr/src/expressions/binary/kernels_arrow.rs @@ -18,41 +18,8 @@ //! This module contains computation kernels that are eventually //! destined for arrow-rs but are in datafusion until they are ported. -use arrow::compute::{ - add_dyn, add_scalar_dyn, divide_dyn_checked, divide_scalar_dyn, modulus_dyn, - modulus_scalar_dyn, multiply_fixed_point, multiply_scalar_dyn, subtract_dyn, - subtract_scalar_dyn, try_unary, -}; -use arrow::datatypes::{Date32Type, Date64Type, Decimal128Type}; use arrow::{array::*, datatypes::ArrowNumericType}; -use arrow_array::ArrowNativeTypeOp; -use arrow_schema::{DataType, IntervalUnit}; -use chrono::{Days, Duration, Months, NaiveDate, NaiveDateTime}; -use datafusion_common::cast::{as_date32_array, as_date64_array, as_decimal128_array}; -use datafusion_common::scalar::{date32_op, date64_op}; -use datafusion_common::{DataFusionError, Result, ScalarValue}; -use std::ops::Add; -use std::sync::Arc; - -use arrow::compute::unary; -use arrow::datatypes::*; - -use arrow_array::temporal_conversions::{MILLISECONDS_IN_DAY, NANOSECONDS_IN_DAY}; -use datafusion_common::delta::shift_months; -use datafusion_common::scalar::{ - calculate_naives, microseconds_add, microseconds_sub, milliseconds_add, - milliseconds_sub, nanoseconds_add, nanoseconds_sub, op_dt, op_dt_mdn, op_mdn, op_ym, - op_ym_dt, op_ym_mdn, parse_timezones, seconds_add, MILLISECOND_MODE, NANOSECOND_MODE, -}; - -use arrow::datatypes::TimeUnit; - -use datafusion_common::cast::{ - as_interval_dt_array, as_interval_mdn_array, as_interval_ym_array, - as_timestamp_microsecond_array, as_timestamp_millisecond_array, - as_timestamp_nanosecond_array, as_timestamp_second_array, -}; -use datafusion_common::scalar::*; +use datafusion_common::Result; // Simple (low performance) kernels until optimized kernels are added to arrow // See https://github.com/apache/arrow-rs/issues/960 @@ -303,1748 +270,9 @@ pub(crate) fn is_not_distinct_from_decimal( .collect()) } -pub(crate) fn add_dyn_decimal( - left: &dyn Array, - right: &dyn Array, - result_type: &DataType, -) -> Result { - let (precision, scale) = get_precision_scale(result_type)?; - let array = add_dyn(left, right)?; - decimal_array_with_precision_scale(array, precision, scale) -} - -pub(crate) fn add_decimal_dyn_scalar( - left: &dyn Array, - right: i128, - result_type: &DataType, -) -> Result { - let (precision, scale) = get_precision_scale(result_type)?; - - let array = add_scalar_dyn::(left, right)?; - decimal_array_with_precision_scale(array, precision, scale) -} - -pub(crate) fn add_dyn_temporal(left: &ArrayRef, right: &ArrayRef) -> Result { - match (left.data_type(), right.data_type()) { - (DataType::Timestamp(..), DataType::Timestamp(..)) => ts_array_op(left, right), - (DataType::Interval(..), DataType::Interval(..)) => { - interval_array_op(left, right, 1) - } - (DataType::Timestamp(..), DataType::Interval(..)) => { - ts_interval_array_op(left, 1, right) - } - (DataType::Interval(..), DataType::Timestamp(..)) => { - ts_interval_array_op(right, 1, left) - } - _ => { - // fall back to kernels in arrow-rs - Ok(add_dyn(left, right)?) - } - } -} - -pub(crate) fn add_dyn_temporal_right_scalar( - left: &ArrayRef, - right: &ScalarValue, -) -> Result { - match (left.data_type(), right.get_datatype()) { - // Date32 + Interval - (DataType::Date32, DataType::Interval(..)) => { - let left = as_date32_array(&left)?; - let ret = Arc::new(try_unary::(left, |days| { - Ok(date32_op(days, right, 1)?) - })?) as _; - Ok(ret) - } - // Date64 + Interval - (DataType::Date64, DataType::Interval(..)) => { - let left = as_date64_array(&left)?; - let ret = Arc::new(try_unary::(left, |ms| { - Ok(date64_op(ms, right, 1)?) - })?) as _; - Ok(ret) - } - // Interval + Interval - (DataType::Interval(..), DataType::Interval(..)) => { - interval_op_scalar_interval(left, 1, right) - } - // Timestamp + Interval - (DataType::Timestamp(..), DataType::Interval(..)) => { - ts_op_scalar_interval(left, 1, right) - } - _ => { - // fall back to kernels in arrow-rs - Ok(add_dyn(left, &right.to_array())?) - } - } -} - -pub(crate) fn add_dyn_temporal_left_scalar( - left: &ScalarValue, - right: &ArrayRef, -) -> Result { - match (left.get_datatype(), right.data_type()) { - // Date32 + Interval - (DataType::Date32, DataType::Interval(..)) => { - if let ScalarValue::Date32(Some(left)) = left { - scalar_date32_array_interval_op( - *left, - right, - NaiveDate::checked_add_days, - NaiveDate::checked_add_months, - ) - } else { - Err(DataFusionError::Internal( - "Date32 value is None".to_string(), - )) - } - } - // Date64 + Interval - (DataType::Date64, DataType::Interval(..)) => { - if let ScalarValue::Date64(Some(left)) = left { - scalar_date64_array_interval_op( - *left, - right, - NaiveDate::checked_add_days, - NaiveDate::checked_add_months, - ) - } else { - Err(DataFusionError::Internal( - "Date64 value is None".to_string(), - )) - } - } - // Interval + Interval - (DataType::Interval(..), DataType::Interval(..)) => { - scalar_interval_op_interval(left, 1, right) - } - // Timestamp + Interval - (DataType::Timestamp(..), DataType::Interval(..)) => { - scalar_ts_op_interval(left, 1, right) - } - _ => { - // fall back to kernels in arrow-rs - Ok(add_dyn(&left.to_array(), right)?) - } - } -} - -pub(crate) fn subtract_decimal_dyn_scalar( - left: &dyn Array, - right: i128, - result_type: &DataType, -) -> Result { - let (precision, scale) = get_precision_scale(result_type)?; - - let array = subtract_scalar_dyn::(left, right)?; - decimal_array_with_precision_scale(array, precision, scale) -} - -pub(crate) fn subtract_dyn_temporal( - left: &ArrayRef, - right: &ArrayRef, -) -> Result { - match (left.data_type(), right.data_type()) { - (DataType::Timestamp(..), DataType::Timestamp(..)) => ts_array_op(left, right), - (DataType::Interval(..), DataType::Interval(..)) => { - interval_array_op(left, right, -1) - } - (DataType::Timestamp(..), DataType::Interval(..)) => { - ts_interval_array_op(left, -1, right) - } - (DataType::Interval(..), DataType::Timestamp(..)) => { - ts_interval_array_op(right, -1, left) - } - _ => { - // fall back to kernels in arrow-rs - Ok(subtract_dyn(left, right)?) - } - } -} - -pub(crate) fn subtract_dyn_temporal_right_scalar( - left: &ArrayRef, - right: &ScalarValue, -) -> Result { - match (left.data_type(), right.get_datatype()) { - // Date32 - Interval - (DataType::Date32, DataType::Interval(..)) => { - let left = as_date32_array(&left)?; - let ret = Arc::new(try_unary::(left, |days| { - Ok(date32_op(days, right, -1)?) - })?) as _; - Ok(ret) - } - // Date64 - Interval - (DataType::Date64, DataType::Interval(..)) => { - let left = as_date64_array(&left)?; - let ret = Arc::new(try_unary::(left, |ms| { - Ok(date64_op(ms, right, -1)?) - })?) as _; - Ok(ret) - } - // Timestamp - Timestamp - (DataType::Timestamp(..), DataType::Timestamp(..)) => { - ts_sub_scalar_ts(left, right) - } - // Interval - Interval - (DataType::Interval(..), DataType::Interval(..)) => { - interval_op_scalar_interval(left, -1, right) - } - // Timestamp - Interval - (DataType::Timestamp(..), DataType::Interval(..)) => { - ts_op_scalar_interval(left, -1, right) - } - _ => { - // fall back to kernels in arrow-rs - Ok(subtract_dyn(left, &right.to_array())?) - } - } -} - -pub(crate) fn subtract_dyn_temporal_left_scalar( - left: &ScalarValue, - right: &ArrayRef, -) -> Result { - match (left.get_datatype(), right.data_type()) { - // Date32 - Interval - (DataType::Date32, DataType::Interval(..)) => { - if let ScalarValue::Date32(Some(left)) = left { - scalar_date32_array_interval_op( - *left, - right, - NaiveDate::checked_sub_days, - NaiveDate::checked_sub_months, - ) - } else { - Err(DataFusionError::Internal( - "Date32 value is None".to_string(), - )) - } - } - // Date64 - Interval - (DataType::Date64, DataType::Interval(..)) => { - if let ScalarValue::Date64(Some(left)) = left { - scalar_date64_array_interval_op( - *left, - right, - NaiveDate::checked_sub_days, - NaiveDate::checked_sub_months, - ) - } else { - Err(DataFusionError::Internal( - "Date64 value is None".to_string(), - )) - } - } - // Timestamp - Timestamp - (DataType::Timestamp(..), DataType::Timestamp(..)) => { - scalar_ts_sub_ts(left, right) - } - // Interval - Interval - (DataType::Interval(..), DataType::Interval(..)) => { - scalar_interval_op_interval(left, -1, right) - } - // Timestamp - Interval - (DataType::Timestamp(..), DataType::Interval(..)) => { - scalar_ts_op_interval(left, -1, right) - } - _ => { - // fall back to kernels in arrow-rs - Ok(subtract_dyn(&left.to_array(), right)?) - } - } -} - -fn scalar_date32_array_interval_op( - left: i32, - right: &ArrayRef, - day_op: fn(NaiveDate, Days) -> Option, - month_op: fn(NaiveDate, Months) -> Option, -) -> Result { - let epoch = NaiveDate::from_ymd_opt(1970, 1, 1) - .ok_or_else(|| DataFusionError::Execution("Invalid Date entered".to_string()))?; - let prior = epoch.add(Duration::days(left as i64)); - match right.data_type() { - DataType::Interval(IntervalUnit::YearMonth) => { - date32_interval_ym_op(right, &epoch, &prior, month_op) - } - DataType::Interval(IntervalUnit::DayTime) => { - date32_interval_dt_op(right, &epoch, &prior, day_op) - } - DataType::Interval(IntervalUnit::MonthDayNano) => { - date32_interval_mdn_op(right, &epoch, &prior, day_op, month_op) - } - _ => Err(DataFusionError::Internal(format!( - "Expected type is an interval, but {} is found", - right.data_type() - ))), - } -} - -fn scalar_date64_array_interval_op( - left: i64, - right: &ArrayRef, - day_op: fn(NaiveDate, Days) -> Option, - month_op: fn(NaiveDate, Months) -> Option, -) -> Result { - let epoch = NaiveDate::from_ymd_opt(1970, 1, 1) - .ok_or_else(|| DataFusionError::Execution("Invalid Date entered".to_string()))?; - let prior = epoch.add(Duration::milliseconds(left)); - match right.data_type() { - DataType::Interval(IntervalUnit::YearMonth) => { - date64_interval_ym_op(right, &epoch, &prior, month_op) - } - DataType::Interval(IntervalUnit::DayTime) => { - date64_interval_dt_op(right, &epoch, &prior, day_op) - } - DataType::Interval(IntervalUnit::MonthDayNano) => { - date64_interval_mdn_op(right, &epoch, &prior, day_op, month_op) - } - _ => Err(DataFusionError::Internal(format!( - "Expected type is an interval, but {} is found", - right.data_type() - ))), - } -} - -fn get_precision_scale(data_type: &DataType) -> Result<(u8, i8)> { - match data_type { - DataType::Decimal128(precision, scale) => Ok((*precision, *scale)), - DataType::Dictionary(_, value_type) => match value_type.as_ref() { - DataType::Decimal128(precision, scale) => Ok((*precision, *scale)), - _ => Err(DataFusionError::Internal( - "Unexpected data type".to_string(), - )), - }, - _ => Err(DataFusionError::Internal( - "Unexpected data type".to_string(), - )), - } -} - -fn decimal_array_with_precision_scale( - array: ArrayRef, - precision: u8, - scale: i8, -) -> Result { - let array = array.as_ref(); - let decimal_array = match array.data_type() { - DataType::Decimal128(_, _) => { - let array = as_decimal128_array(array)?; - Arc::new(array.clone().with_precision_and_scale(precision, scale)?) - as ArrayRef - } - _ => { - return Err(DataFusionError::Internal( - "Unexpected data type".to_string(), - )) - } - }; - Ok(decimal_array) -} - -pub(crate) fn multiply_decimal_dyn_scalar( - left: &dyn Array, - right: i128, - result_type: &DataType, -) -> Result { - let (precision, scale) = get_precision_scale(result_type)?; - let array = multiply_scalar_dyn::(left, right)?; - decimal_array_with_precision_scale(array, precision, scale) -} - -pub(crate) fn divide_decimal_dyn_scalar( - left: &dyn Array, - right: i128, - result_type: &DataType, -) -> Result { - let (precision, scale) = get_precision_scale(result_type)?; - - let mul = 10_i128.pow(scale as u32); - let array = multiply_scalar_dyn::(left, mul)?; - - let array = divide_scalar_dyn::(&array, right)?; - decimal_array_with_precision_scale(array, precision, scale) -} - -pub(crate) fn subtract_dyn_decimal( - left: &dyn Array, - right: &dyn Array, - result_type: &DataType, -) -> Result { - let (precision, scale) = get_precision_scale(result_type)?; - let array = subtract_dyn(left, right)?; - decimal_array_with_precision_scale(array, precision, scale) -} - -/// Remove this once arrow-rs provides `multiply_fixed_point_dyn`. -/// -fn multiply_fixed_point_dyn( - left: &dyn Array, - right: &dyn Array, - required_scale: i8, -) -> Result { - match (left.data_type(), right.data_type()) { - (DataType::Decimal128(_, _), DataType::Decimal128(_, _)) => { - let left = left.as_primitive::(); - let right = right.as_primitive::(); - - Ok(multiply_fixed_point(left, right, required_scale) - .map(|a| Arc::new(a) as ArrayRef)?) - } - (_, _) => Err(DataFusionError::Internal(format!( - "Unsupported data type {}, {}", - left.data_type(), - right.data_type() - ))), - } -} - -pub(crate) fn multiply_dyn_decimal( - left: &dyn Array, - right: &dyn Array, - result_type: &DataType, -) -> Result { - let (precision, scale) = get_precision_scale(result_type)?; - let array = multiply_fixed_point_dyn(left, right, scale)?; - decimal_array_with_precision_scale(array, precision, scale) -} - -pub(crate) fn divide_dyn_checked_decimal( - left: &dyn Array, - right: &dyn Array, - result_type: &DataType, -) -> Result { - let (precision, scale) = get_precision_scale(result_type)?; - - let mul = 10_i128.pow(scale as u32); - let array = multiply_scalar_dyn::(left, mul)?; - - // Restore to original precision and scale (metadata only) - let (org_precision, org_scale) = get_precision_scale(right.data_type())?; - let array = decimal_array_with_precision_scale(array, org_precision, org_scale)?; - let array = divide_dyn_checked(&array, right)?; - decimal_array_with_precision_scale(array, precision, scale) -} - -pub(crate) fn modulus_dyn_decimal( - left: &dyn Array, - right: &dyn Array, - result_type: &DataType, -) -> Result { - let (precision, scale) = get_precision_scale(result_type)?; - let array = modulus_dyn(left, right)?; - decimal_array_with_precision_scale(array, precision, scale) -} - -pub(crate) fn modulus_decimal_dyn_scalar( - left: &dyn Array, - right: i128, - result_type: &DataType, -) -> Result { - let (precision, scale) = get_precision_scale(result_type)?; - - let array = modulus_scalar_dyn::(left, right)?; - decimal_array_with_precision_scale(array, precision, scale) -} - -macro_rules! sub_timestamp_macro { - ($array:expr, $rhs:expr, $caster:expr, $interval_type:ty, $opt_tz_lhs:expr, $multiplier:expr, - $opt_tz_rhs:expr, $unit_sub:expr, $naive_sub_fn:expr, $counter:expr) => {{ - let prim_array = $caster(&$array)?; - let ret: PrimitiveArray<$interval_type> = try_unary(prim_array, |lhs| { - let (parsed_lhs_tz, parsed_rhs_tz) = - (parse_timezones($opt_tz_lhs)?, parse_timezones($opt_tz_rhs)?); - let (naive_lhs, naive_rhs) = calculate_naives::<$unit_sub>( - lhs.mul_wrapping($multiplier), - parsed_lhs_tz, - $rhs.mul_wrapping($multiplier), - parsed_rhs_tz, - )?; - Ok($naive_sub_fn($counter(&naive_lhs), $counter(&naive_rhs))) - })?; - Arc::new(ret) as _ - }}; -} - -macro_rules! sub_timestamp_left_scalar_macro { - ($array:expr, $lhs:expr, $caster:expr, $interval_type:ty, $opt_tz_lhs:expr, $multiplier:expr, - $opt_tz_rhs:expr, $unit_sub:expr, $naive_sub_fn:expr, $counter:expr) => {{ - let prim_array = $caster(&$array)?; - let ret: PrimitiveArray<$interval_type> = try_unary(prim_array, |rhs| { - let (parsed_lhs_tz, parsed_rhs_tz) = - (parse_timezones($opt_tz_lhs)?, parse_timezones($opt_tz_rhs)?); - let (naive_lhs, naive_rhs) = calculate_naives::<$unit_sub>( - $lhs.mul_wrapping($multiplier), - parsed_lhs_tz, - rhs.mul_wrapping($multiplier), - parsed_rhs_tz, - )?; - Ok($naive_sub_fn($counter(&naive_lhs), $counter(&naive_rhs))) - })?; - Arc::new(ret) as _ - }}; -} - -macro_rules! op_timestamp_interval_macro { - ($array:expr, $as_timestamp:expr, $ts_type:ty, $fn_op:expr, $scalar:expr, $sign:expr, $tz:expr) => {{ - let array = $as_timestamp(&$array)?; - let ret: PrimitiveArray<$ts_type> = - try_unary::<$ts_type, _, $ts_type>(array, |ts_s| { - Ok($fn_op(ts_s, $scalar, $sign)?) - })?; - Arc::new(ret.with_timezone_opt($tz.clone())) as _ - }}; -} - -macro_rules! scalar_ts_op_interval_macro { - ($ts:ident, $tz:ident, $interval:ident, $sign:ident, - $caster1:expr, $type1:ty, $type2:ty, $op:expr, $back_caster:expr) => {{ - let interval = $caster1(&$interval)?; - let ret: PrimitiveArray<$type1> = - try_unary::<$type2, _, $type1>(interval, |e| { - let prior = $ts.ok_or_else(|| { - DataFusionError::Internal("Timestamp is out-of-range".to_string()) - })?; - Ok($back_caster(&$op(prior, e, $sign))) - })?; - Arc::new(ret.with_timezone_opt($tz.clone())) as _ - }}; -} - -macro_rules! op_interval_macro { - ($array:expr, $as_interval:expr, $interval_type:ty, $fn_op:expr, $scalar:expr, $sign:expr) => {{ - let array = $as_interval(&$array)?; - let ret: PrimitiveArray<$interval_type> = - unary(array, |lhs| $fn_op(lhs, *$scalar, $sign)); - Arc::new(ret) as _ - }}; -} - -macro_rules! op_interval_cross_macro { - ($array:expr, $as_interval:expr, $commute:expr, $fn_op:expr, $scalar:expr, $sign:expr, $t1:ty, $t2:ty) => {{ - let array = $as_interval(&$array)?; - let ret: PrimitiveArray = if $commute { - unary(array, |lhs| { - $fn_op(*$scalar as $t1, lhs as $t2, $sign, $commute) - }) - } else { - unary(array, |lhs| { - $fn_op(lhs as $t1, *$scalar as $t2, $sign, $commute) - }) - }; - Arc::new(ret) as _ - }}; -} - -macro_rules! ts_sub_op { - ($lhs:ident, $rhs:ident, $lhs_tz:ident, $rhs_tz:ident, $coef:expr, $caster:expr, $op:expr, $ts_unit:expr, $mode:expr, $type_out:ty) => {{ - let prim_array_lhs = $caster(&$lhs)?; - let prim_array_rhs = $caster(&$rhs)?; - let ret: PrimitiveArray<$type_out> = - arrow::compute::try_binary(prim_array_lhs, prim_array_rhs, |ts1, ts2| { - let (parsed_lhs_tz, parsed_rhs_tz) = ( - parse_timezones($lhs_tz.as_deref())?, - parse_timezones($rhs_tz.as_deref())?, - ); - let (naive_lhs, naive_rhs) = calculate_naives::<$mode>( - ts1.mul_wrapping($coef), - parsed_lhs_tz, - ts2.mul_wrapping($coef), - parsed_rhs_tz, - )?; - Ok($op($ts_unit(&naive_lhs), $ts_unit(&naive_rhs))) - })?; - Arc::new(ret) as _ - }}; -} - -macro_rules! interval_op { - ($lhs:ident, $rhs:ident, $caster:expr, $op:expr, $sign:ident, $type_in:ty) => {{ - let prim_array_lhs = $caster(&$lhs)?; - let prim_array_rhs = $caster(&$rhs)?; - Arc::new(arrow::compute::binary::<$type_in, $type_in, _, $type_in>( - prim_array_lhs, - prim_array_rhs, - |interval1, interval2| $op(interval1, interval2, $sign), - )?) as _ - }}; -} - -macro_rules! interval_cross_op { - ($lhs:ident, $rhs:ident, $caster1:expr, $caster2:expr, $op:expr, $sign:ident, $commute:ident, $type_in1:ty, $type_in2:ty) => {{ - let prim_array_lhs = $caster1(&$lhs)?; - let prim_array_rhs = $caster2(&$rhs)?; - Arc::new(arrow::compute::binary::< - $type_in1, - $type_in2, - _, - IntervalMonthDayNanoType, - >( - prim_array_lhs, - prim_array_rhs, - |interval1, interval2| $op(interval1, interval2, $sign, $commute), - )?) as _ - }}; -} - -macro_rules! ts_interval_op { - ($lhs:ident, $rhs:ident, $tz:ident, $caster1:expr, $caster2:expr, $op:expr, $sign:ident, $type_in1:ty, $type_in2:ty) => {{ - let prim_array_lhs = $caster1(&$lhs)?; - let prim_array_rhs = $caster2(&$rhs)?; - let ret: PrimitiveArray<$type_in1> = arrow::compute::try_binary( - prim_array_lhs, - prim_array_rhs, - |ts, interval| Ok($op(ts, interval as i128, $sign)?), - )?; - Arc::new(ret.with_timezone_opt($tz.clone())) as _ - }}; -} - -/// This function handles timestamp - timestamp operations where the former is -/// an array and the latter is a scalar, resulting in an array. -pub fn ts_sub_scalar_ts(array: &ArrayRef, scalar: &ScalarValue) -> Result { - let ret = match (array.data_type(), scalar) { - ( - DataType::Timestamp(TimeUnit::Second, opt_tz_lhs), - ScalarValue::TimestampSecond(Some(rhs), opt_tz_rhs), - ) => { - sub_timestamp_macro!( - array, - rhs, - as_timestamp_second_array, - IntervalDayTimeType, - opt_tz_lhs.as_deref(), - 1000, - opt_tz_rhs.as_deref(), - MILLISECOND_MODE, - seconds_sub, - NaiveDateTime::timestamp - ) - } - ( - DataType::Timestamp(TimeUnit::Millisecond, opt_tz_lhs), - ScalarValue::TimestampMillisecond(Some(rhs), opt_tz_rhs), - ) => { - sub_timestamp_macro!( - array, - rhs, - as_timestamp_millisecond_array, - IntervalDayTimeType, - opt_tz_lhs.as_deref(), - 1, - opt_tz_rhs.as_deref(), - MILLISECOND_MODE, - milliseconds_sub, - NaiveDateTime::timestamp_millis - ) - } - ( - DataType::Timestamp(TimeUnit::Microsecond, opt_tz_lhs), - ScalarValue::TimestampMicrosecond(Some(rhs), opt_tz_rhs), - ) => { - sub_timestamp_macro!( - array, - rhs, - as_timestamp_microsecond_array, - IntervalMonthDayNanoType, - opt_tz_lhs.as_deref(), - 1000, - opt_tz_rhs.as_deref(), - NANOSECOND_MODE, - microseconds_sub, - NaiveDateTime::timestamp_micros - ) - } - ( - DataType::Timestamp(TimeUnit::Nanosecond, opt_tz_lhs), - ScalarValue::TimestampNanosecond(Some(rhs), opt_tz_rhs), - ) => { - sub_timestamp_macro!( - array, - rhs, - as_timestamp_nanosecond_array, - IntervalMonthDayNanoType, - opt_tz_lhs.as_deref(), - 1, - opt_tz_rhs.as_deref(), - NANOSECOND_MODE, - nanoseconds_sub, - NaiveDateTime::timestamp_nanos - ) - } - (_, _) => { - return Err(DataFusionError::Internal(format!( - "Invalid array - scalar types for Timestamp subtraction: {:?} - {:?}", - array.data_type(), - scalar.get_datatype() - ))); - } - }; - Ok(ret) -} - -/// This function handles timestamp - timestamp operations where the former is -/// a scalar and the latter is an array, resulting in an array. -pub fn scalar_ts_sub_ts(scalar: &ScalarValue, array: &ArrayRef) -> Result { - let ret = match (scalar, array.data_type()) { - ( - ScalarValue::TimestampSecond(Some(lhs), opt_tz_lhs), - DataType::Timestamp(TimeUnit::Second, opt_tz_rhs), - ) => { - sub_timestamp_left_scalar_macro!( - array, - lhs, - as_timestamp_second_array, - IntervalDayTimeType, - opt_tz_lhs.as_deref(), - 1000, - opt_tz_rhs.as_deref(), - MILLISECOND_MODE, - seconds_sub, - NaiveDateTime::timestamp - ) - } - ( - ScalarValue::TimestampMillisecond(Some(lhs), opt_tz_lhs), - DataType::Timestamp(TimeUnit::Millisecond, opt_tz_rhs), - ) => { - sub_timestamp_left_scalar_macro!( - array, - lhs, - as_timestamp_millisecond_array, - IntervalDayTimeType, - opt_tz_lhs.as_deref(), - 1, - opt_tz_rhs.as_deref(), - MILLISECOND_MODE, - milliseconds_sub, - NaiveDateTime::timestamp_millis - ) - } - ( - ScalarValue::TimestampMicrosecond(Some(lhs), opt_tz_lhs), - DataType::Timestamp(TimeUnit::Microsecond, opt_tz_rhs), - ) => { - sub_timestamp_left_scalar_macro!( - array, - lhs, - as_timestamp_microsecond_array, - IntervalMonthDayNanoType, - opt_tz_lhs.as_deref(), - 1000, - opt_tz_rhs.as_deref(), - NANOSECOND_MODE, - microseconds_sub, - NaiveDateTime::timestamp_micros - ) - } - ( - ScalarValue::TimestampNanosecond(Some(lhs), opt_tz_lhs), - DataType::Timestamp(TimeUnit::Nanosecond, opt_tz_rhs), - ) => { - sub_timestamp_left_scalar_macro!( - array, - lhs, - as_timestamp_nanosecond_array, - IntervalMonthDayNanoType, - opt_tz_lhs.as_deref(), - 1, - opt_tz_rhs.as_deref(), - NANOSECOND_MODE, - nanoseconds_sub, - NaiveDateTime::timestamp_nanos - ) - } - (_, _) => { - return Err(DataFusionError::Internal(format!( - "Invalid scalar - array types for Timestamp subtraction: {:?} - {:?}", - scalar.get_datatype(), - array.data_type() - ))); - } - }; - Ok(ret) -} - -/// This function handles timestamp +/- interval operations where the former is -/// an array and the latter is a scalar, resulting in an array. -pub fn ts_op_scalar_interval( - array: &ArrayRef, - sign: i32, - scalar: &ScalarValue, -) -> Result { - let ret = match array.data_type() { - DataType::Timestamp(TimeUnit::Second, tz) => { - op_timestamp_interval_macro!( - array, - as_timestamp_second_array, - TimestampSecondType, - seconds_add, - scalar, - sign, - tz - ) - } - DataType::Timestamp(TimeUnit::Millisecond, tz) => { - op_timestamp_interval_macro!( - array, - as_timestamp_millisecond_array, - TimestampMillisecondType, - milliseconds_add, - scalar, - sign, - tz - ) - } - DataType::Timestamp(TimeUnit::Microsecond, tz) => { - op_timestamp_interval_macro!( - array, - as_timestamp_microsecond_array, - TimestampMicrosecondType, - microseconds_add, - scalar, - sign, - tz - ) - } - DataType::Timestamp(TimeUnit::Nanosecond, tz) => { - op_timestamp_interval_macro!( - array, - as_timestamp_nanosecond_array, - TimestampNanosecondType, - nanoseconds_add, - scalar, - sign, - tz - ) - } - _ => Err(DataFusionError::Internal(format!( - "Invalid lhs type for Timestamp vs Interval operations: {}", - array.data_type() - )))?, - }; - Ok(ret) -} - -/// This function handles timestamp +/- interval operations where the former is -/// a scalar and the latter is an array, resulting in an array. -pub fn scalar_ts_op_interval( - scalar: &ScalarValue, - sign: i32, - array: &ArrayRef, -) -> Result { - use DataType::*; - use IntervalUnit::*; - use ScalarValue::*; - let ret = match (scalar, array.data_type()) { - // Second op YearMonth - (TimestampSecond(Some(ts_sec), tz), Interval(YearMonth)) => { - let naive_date = NaiveDateTime::from_timestamp_opt(*ts_sec, 0); - scalar_ts_op_interval_macro!( - naive_date, - tz, - array, - sign, - as_interval_ym_array, - TimestampSecondType, - IntervalYearMonthType, - shift_months, - NaiveDateTime::timestamp - ) - } - // Millisecond op YearMonth - (TimestampMillisecond(Some(ts_ms), tz), Interval(YearMonth)) => { - let naive_date = NaiveDateTime::from_timestamp_millis(*ts_ms); - scalar_ts_op_interval_macro!( - naive_date, - tz, - array, - sign, - as_interval_ym_array, - TimestampSecondType, - IntervalYearMonthType, - shift_months, - NaiveDateTime::timestamp - ) - } - // Microsecond op YearMonth - (TimestampMicrosecond(Some(ts_us), tz), Interval(YearMonth)) => { - let naive_date = NaiveDateTime::from_timestamp_micros(*ts_us); - scalar_ts_op_interval_macro!( - naive_date, - tz, - array, - sign, - as_interval_ym_array, - TimestampSecondType, - IntervalYearMonthType, - shift_months, - NaiveDateTime::timestamp - ) - } - // Nanosecond op YearMonth - (TimestampNanosecond(Some(ts_ns), tz), Interval(YearMonth)) => { - let naive_date = NaiveDateTime::from_timestamp_opt( - ts_ns.div_euclid(1_000_000_000), - ts_ns.rem_euclid(1_000_000_000).try_into().map_err(|_| { - DataFusionError::Internal("Overflow of divison".to_string()) - })?, - ); - scalar_ts_op_interval_macro!( - naive_date, - tz, - array, - sign, - as_interval_ym_array, - TimestampSecondType, - IntervalYearMonthType, - shift_months, - NaiveDateTime::timestamp - ) - } - // Second op DayTime - (TimestampSecond(Some(ts_sec), tz), Interval(DayTime)) => { - let naive_date = NaiveDateTime::from_timestamp_opt(*ts_sec, 0); - scalar_ts_op_interval_macro!( - naive_date, - tz, - array, - sign, - as_interval_dt_array, - TimestampSecondType, - IntervalDayTimeType, - add_day_time, - NaiveDateTime::timestamp - ) - } - // Millisecond op DayTime - (TimestampMillisecond(Some(ts_ms), tz), Interval(DayTime)) => { - let naive_date = NaiveDateTime::from_timestamp_millis(*ts_ms); - scalar_ts_op_interval_macro!( - naive_date, - tz, - array, - sign, - as_interval_dt_array, - TimestampMillisecondType, - IntervalDayTimeType, - add_day_time, - NaiveDateTime::timestamp_millis - ) - } - // Microsecond op DayTime - (TimestampMicrosecond(Some(ts_us), tz), Interval(DayTime)) => { - let naive_date = NaiveDateTime::from_timestamp_micros(*ts_us); - scalar_ts_op_interval_macro!( - naive_date, - tz, - array, - sign, - as_interval_dt_array, - TimestampMicrosecondType, - IntervalDayTimeType, - add_day_time, - NaiveDateTime::timestamp_micros - ) - } - // Nanosecond op DayTime - (TimestampNanosecond(Some(ts_ns), tz), Interval(DayTime)) => { - let naive_date = NaiveDateTime::from_timestamp_opt( - ts_ns.div_euclid(1_000_000_000), - ts_ns.rem_euclid(1_000_000_000).try_into().map_err(|_| { - DataFusionError::Internal("Overflow of divison".to_string()) - })?, - ); - scalar_ts_op_interval_macro!( - naive_date, - tz, - array, - sign, - as_interval_dt_array, - TimestampNanosecondType, - IntervalDayTimeType, - add_day_time, - NaiveDateTime::timestamp_nanos - ) - } - // Second op MonthDayNano - (TimestampSecond(Some(ts_sec), tz), Interval(MonthDayNano)) => { - let naive_date = NaiveDateTime::from_timestamp_opt(*ts_sec, 0); - scalar_ts_op_interval_macro!( - naive_date, - tz, - array, - sign, - as_interval_mdn_array, - TimestampSecondType, - IntervalMonthDayNanoType, - add_m_d_nano, - NaiveDateTime::timestamp - ) - } - // Millisecond op MonthDayNano - (TimestampMillisecond(Some(ts_ms), tz), Interval(MonthDayNano)) => { - let naive_date = NaiveDateTime::from_timestamp_millis(*ts_ms); - scalar_ts_op_interval_macro!( - naive_date, - tz, - array, - sign, - as_interval_mdn_array, - TimestampMillisecondType, - IntervalMonthDayNanoType, - add_m_d_nano, - NaiveDateTime::timestamp_millis - ) - } - // Microsecond op MonthDayNano - (TimestampMicrosecond(Some(ts_us), tz), Interval(MonthDayNano)) => { - let naive_date = NaiveDateTime::from_timestamp_micros(*ts_us); - scalar_ts_op_interval_macro!( - naive_date, - tz, - array, - sign, - as_interval_mdn_array, - TimestampMicrosecondType, - IntervalMonthDayNanoType, - add_m_d_nano, - NaiveDateTime::timestamp_micros - ) - } - - // Nanosecond op MonthDayNano - (TimestampNanosecond(Some(ts_ns), tz), Interval(MonthDayNano)) => { - let naive_date = NaiveDateTime::from_timestamp_opt( - ts_ns.div_euclid(1_000_000_000), - ts_ns.rem_euclid(1_000_000_000).try_into().map_err(|_| { - DataFusionError::Internal("Overflow of divison".to_string()) - })?, - ); - scalar_ts_op_interval_macro!( - naive_date, - tz, - array, - sign, - as_interval_mdn_array, - TimestampNanosecondType, - IntervalMonthDayNanoType, - add_m_d_nano, - NaiveDateTime::timestamp_nanos - ) - } - _ => Err(DataFusionError::Internal( - "Invalid types for Timestamp vs Interval operations".to_string(), - ))?, - }; - Ok(ret) -} - -/// This function handles interval +/- interval operations where the former is -/// an array and the latter is a scalar, resulting in an interval array. -pub fn interval_op_scalar_interval( - array: &ArrayRef, - sign: i32, - scalar: &ScalarValue, -) -> Result { - use DataType::*; - use IntervalUnit::*; - use ScalarValue::*; - let ret = match (array.data_type(), scalar) { - (Interval(YearMonth), IntervalYearMonth(Some(rhs))) => { - op_interval_macro!( - array, - as_interval_ym_array, - IntervalYearMonthType, - op_ym, - rhs, - sign - ) - } - (Interval(YearMonth), IntervalDayTime(Some(rhs))) => { - op_interval_cross_macro!( - array, - as_interval_ym_array, - false, - op_ym_dt, - rhs, - sign, - i32, - i64 - ) - } - (Interval(YearMonth), IntervalMonthDayNano(Some(rhs))) => { - op_interval_cross_macro!( - array, - as_interval_ym_array, - false, - op_ym_mdn, - rhs, - sign, - i32, - i128 - ) - } - (Interval(DayTime), IntervalYearMonth(Some(rhs))) => { - op_interval_cross_macro!( - array, - as_interval_dt_array, - true, - op_ym_dt, - rhs, - sign, - i32, - i64 - ) - } - (Interval(DayTime), IntervalDayTime(Some(rhs))) => { - op_interval_macro!( - array, - as_interval_dt_array, - IntervalDayTimeType, - op_dt, - rhs, - sign - ) - } - (Interval(DayTime), IntervalMonthDayNano(Some(rhs))) => { - op_interval_cross_macro!( - array, - as_interval_dt_array, - false, - op_dt_mdn, - rhs, - sign, - i64, - i128 - ) - } - (Interval(MonthDayNano), IntervalYearMonth(Some(rhs))) => { - op_interval_cross_macro!( - array, - as_interval_mdn_array, - true, - op_ym_mdn, - rhs, - sign, - i32, - i128 - ) - } - (Interval(MonthDayNano), IntervalDayTime(Some(rhs))) => { - op_interval_cross_macro!( - array, - as_interval_mdn_array, - true, - op_dt_mdn, - rhs, - sign, - i64, - i128 - ) - } - (Interval(MonthDayNano), IntervalMonthDayNano(Some(rhs))) => { - op_interval_macro!( - array, - as_interval_mdn_array, - IntervalMonthDayNanoType, - op_mdn, - rhs, - sign - ) - } - _ => Err(DataFusionError::Internal(format!( - "Invalid operands for Interval vs Interval operations: {} - {}", - array.data_type(), - scalar.get_datatype(), - )))?, - }; - Ok(ret) -} - -/// This function handles interval +/- interval operations where the former is -/// a scalar and the latter is an array, resulting in an interval array. -pub fn scalar_interval_op_interval( - scalar: &ScalarValue, - sign: i32, - array: &ArrayRef, -) -> Result { - use DataType::*; - use IntervalUnit::*; - use ScalarValue::*; - let ret = match (scalar, array.data_type()) { - // YearMonth op YearMonth - (IntervalYearMonth(Some(lhs)), Interval(YearMonth)) => { - let array = as_interval_ym_array(&array)?; - let ret: PrimitiveArray = - unary(array, |rhs| op_ym(*lhs, rhs, sign)); - Arc::new(ret) as _ - } - // DayTime op YearMonth - (IntervalDayTime(Some(lhs)), Interval(YearMonth)) => { - let array = as_interval_ym_array(&array)?; - let ret: PrimitiveArray = - unary(array, |rhs| op_ym_dt(rhs, *lhs, sign, true)); - Arc::new(ret) as _ - } - // MonthDayNano op YearMonth - (IntervalMonthDayNano(Some(lhs)), Interval(YearMonth)) => { - let array = as_interval_ym_array(&array)?; - let ret: PrimitiveArray = - unary(array, |rhs| op_ym_mdn(rhs, *lhs, sign, true)); - Arc::new(ret) as _ - } - // YearMonth op DayTime - (IntervalYearMonth(Some(lhs)), Interval(DayTime)) => { - let array = as_interval_dt_array(&array)?; - let ret: PrimitiveArray = - unary(array, |rhs| op_ym_dt(*lhs, rhs, sign, false)); - Arc::new(ret) as _ - } - // DayTime op DayTime - (IntervalDayTime(Some(lhs)), Interval(DayTime)) => { - let array = as_interval_dt_array(&array)?; - let ret: PrimitiveArray = - unary(array, |rhs| op_dt(*lhs, rhs, sign)); - Arc::new(ret) as _ - } - // MonthDayNano op DayTime - (IntervalMonthDayNano(Some(lhs)), Interval(DayTime)) => { - let array = as_interval_dt_array(&array)?; - let ret: PrimitiveArray = - unary(array, |rhs| op_dt_mdn(rhs, *lhs, sign, true)); - Arc::new(ret) as _ - } - // YearMonth op MonthDayNano - (IntervalYearMonth(Some(lhs)), Interval(MonthDayNano)) => { - let array = as_interval_mdn_array(&array)?; - let ret: PrimitiveArray = - unary(array, |rhs| op_ym_mdn(*lhs, rhs, sign, false)); - Arc::new(ret) as _ - } - // DayTime op MonthDayNano - (IntervalDayTime(Some(lhs)), Interval(MonthDayNano)) => { - let array = as_interval_mdn_array(&array)?; - let ret: PrimitiveArray = - unary(array, |rhs| op_dt_mdn(*lhs, rhs, sign, false)); - Arc::new(ret) as _ - } - // MonthDayNano op MonthDayNano - (IntervalMonthDayNano(Some(lhs)), Interval(MonthDayNano)) => { - let array = as_interval_mdn_array(&array)?; - let ret: PrimitiveArray = - unary(array, |rhs| op_mdn(*lhs, rhs, sign)); - Arc::new(ret) as _ - } - _ => Err(DataFusionError::Internal(format!( - "Invalid operands for Interval vs Interval operations: {} - {}", - scalar.get_datatype(), - array.data_type(), - )))?, - }; - Ok(ret) -} - -/// Performs a timestamp subtraction operation on two arrays and returns the resulting array. -pub fn ts_array_op(array_lhs: &ArrayRef, array_rhs: &ArrayRef) -> Result { - use DataType::*; - use TimeUnit::*; - match (array_lhs.data_type(), array_rhs.data_type()) { - (Timestamp(Second, opt_tz_lhs), Timestamp(Second, opt_tz_rhs)) => Ok(ts_sub_op!( - array_lhs, - array_rhs, - opt_tz_lhs, - opt_tz_rhs, - 1000i64, - as_timestamp_second_array, - seconds_sub, - NaiveDateTime::timestamp, - MILLISECOND_MODE, - IntervalDayTimeType - )), - (Timestamp(Millisecond, opt_tz_lhs), Timestamp(Millisecond, opt_tz_rhs)) => { - Ok(ts_sub_op!( - array_lhs, - array_rhs, - opt_tz_lhs, - opt_tz_rhs, - 1i64, - as_timestamp_millisecond_array, - milliseconds_sub, - NaiveDateTime::timestamp_millis, - MILLISECOND_MODE, - IntervalDayTimeType - )) - } - (Timestamp(Microsecond, opt_tz_lhs), Timestamp(Microsecond, opt_tz_rhs)) => { - Ok(ts_sub_op!( - array_lhs, - array_rhs, - opt_tz_lhs, - opt_tz_rhs, - 1000i64, - as_timestamp_microsecond_array, - microseconds_sub, - NaiveDateTime::timestamp_micros, - NANOSECOND_MODE, - IntervalMonthDayNanoType - )) - } - (Timestamp(Nanosecond, opt_tz_lhs), Timestamp(Nanosecond, opt_tz_rhs)) => { - Ok(ts_sub_op!( - array_lhs, - array_rhs, - opt_tz_lhs, - opt_tz_rhs, - 1i64, - as_timestamp_nanosecond_array, - nanoseconds_sub, - NaiveDateTime::timestamp_nanos, - NANOSECOND_MODE, - IntervalMonthDayNanoType - )) - } - (_, _) => Err(DataFusionError::Execution(format!( - "Invalid array types for Timestamp subtraction: {} - {}", - array_lhs.data_type(), - array_rhs.data_type() - ))), - } -} -/// Performs an interval operation on two arrays and returns the resulting array. -/// The operation sign determines whether to perform addition or subtraction. -/// The data type and unit of the two input arrays must match the supported combinations. -pub fn interval_array_op( - array_lhs: &ArrayRef, - array_rhs: &ArrayRef, - sign: i32, -) -> Result { - use DataType::*; - use IntervalUnit::*; - match (array_lhs.data_type(), array_rhs.data_type()) { - (Interval(YearMonth), Interval(YearMonth)) => Ok(interval_op!( - array_lhs, - array_rhs, - as_interval_ym_array, - op_ym, - sign, - IntervalYearMonthType - )), - (Interval(YearMonth), Interval(DayTime)) => Ok(interval_cross_op!( - array_lhs, - array_rhs, - as_interval_ym_array, - as_interval_dt_array, - op_ym_dt, - sign, - false, - IntervalYearMonthType, - IntervalDayTimeType - )), - (Interval(YearMonth), Interval(MonthDayNano)) => Ok(interval_cross_op!( - array_lhs, - array_rhs, - as_interval_ym_array, - as_interval_mdn_array, - op_ym_mdn, - sign, - false, - IntervalYearMonthType, - IntervalMonthDayNanoType - )), - (Interval(DayTime), Interval(YearMonth)) => Ok(interval_cross_op!( - array_rhs, - array_lhs, - as_interval_ym_array, - as_interval_dt_array, - op_ym_dt, - sign, - true, - IntervalYearMonthType, - IntervalDayTimeType - )), - (Interval(DayTime), Interval(DayTime)) => Ok(interval_op!( - array_lhs, - array_rhs, - as_interval_dt_array, - op_dt, - sign, - IntervalDayTimeType - )), - (Interval(DayTime), Interval(MonthDayNano)) => Ok(interval_cross_op!( - array_lhs, - array_rhs, - as_interval_dt_array, - as_interval_mdn_array, - op_dt_mdn, - sign, - false, - IntervalDayTimeType, - IntervalMonthDayNanoType - )), - (Interval(MonthDayNano), Interval(YearMonth)) => Ok(interval_cross_op!( - array_rhs, - array_lhs, - as_interval_ym_array, - as_interval_mdn_array, - op_ym_mdn, - sign, - true, - IntervalYearMonthType, - IntervalMonthDayNanoType - )), - (Interval(MonthDayNano), Interval(DayTime)) => Ok(interval_cross_op!( - array_rhs, - array_lhs, - as_interval_dt_array, - as_interval_mdn_array, - op_dt_mdn, - sign, - true, - IntervalDayTimeType, - IntervalMonthDayNanoType - )), - (Interval(MonthDayNano), Interval(MonthDayNano)) => Ok(interval_op!( - array_lhs, - array_rhs, - as_interval_mdn_array, - op_mdn, - sign, - IntervalMonthDayNanoType - )), - (_, _) => Err(DataFusionError::Execution(format!( - "Invalid array types for Interval operation: {} {} {}", - array_lhs.data_type(), - sign, - array_rhs.data_type() - ))), - } -} - -/// Performs a timestamp/interval operation on two arrays and returns the resulting array. -/// The operation sign determines whether to perform addition or subtraction. -/// The data type and unit of the two input arrays must match the supported combinations. -pub fn ts_interval_array_op( - array_lhs: &ArrayRef, - sign: i32, - array_rhs: &ArrayRef, -) -> Result { - use DataType::*; - use IntervalUnit::*; - use TimeUnit::*; - match (array_lhs.data_type(), array_rhs.data_type()) { - (Timestamp(Second, tz), Interval(YearMonth)) => Ok(ts_interval_op!( - array_lhs, - array_rhs, - tz, - as_timestamp_second_array, - as_interval_ym_array, - seconds_add_array::, - sign, - TimestampSecondType, - IntervalYearMonthType - )), - (Timestamp(Second, tz), Interval(DayTime)) => Ok(ts_interval_op!( - array_lhs, - array_rhs, - tz, - as_timestamp_second_array, - as_interval_dt_array, - seconds_add_array::, - sign, - TimestampSecondType, - IntervalDayTimeType - )), - (Timestamp(Second, tz), Interval(MonthDayNano)) => Ok(ts_interval_op!( - array_lhs, - array_rhs, - tz, - as_timestamp_second_array, - as_interval_mdn_array, - seconds_add_array::, - sign, - TimestampSecondType, - IntervalMonthDayNanoType - )), - (Timestamp(Millisecond, tz), Interval(YearMonth)) => Ok(ts_interval_op!( - array_lhs, - array_rhs, - tz, - as_timestamp_millisecond_array, - as_interval_ym_array, - milliseconds_add_array::, - sign, - TimestampMillisecondType, - IntervalYearMonthType - )), - (Timestamp(Millisecond, tz), Interval(DayTime)) => Ok(ts_interval_op!( - array_lhs, - array_rhs, - tz, - as_timestamp_millisecond_array, - as_interval_dt_array, - milliseconds_add_array::, - sign, - TimestampMillisecondType, - IntervalDayTimeType - )), - (Timestamp(Millisecond, tz), Interval(MonthDayNano)) => Ok(ts_interval_op!( - array_lhs, - array_rhs, - tz, - as_timestamp_millisecond_array, - as_interval_mdn_array, - milliseconds_add_array::, - sign, - TimestampMillisecondType, - IntervalMonthDayNanoType - )), - (Timestamp(Microsecond, tz), Interval(YearMonth)) => Ok(ts_interval_op!( - array_lhs, - array_rhs, - tz, - as_timestamp_microsecond_array, - as_interval_ym_array, - microseconds_add_array::, - sign, - TimestampMicrosecondType, - IntervalYearMonthType - )), - (Timestamp(Microsecond, tz), Interval(DayTime)) => Ok(ts_interval_op!( - array_lhs, - array_rhs, - tz, - as_timestamp_microsecond_array, - as_interval_dt_array, - microseconds_add_array::, - sign, - TimestampMicrosecondType, - IntervalDayTimeType - )), - (Timestamp(Microsecond, tz), Interval(MonthDayNano)) => Ok(ts_interval_op!( - array_lhs, - array_rhs, - tz, - as_timestamp_microsecond_array, - as_interval_mdn_array, - microseconds_add_array::, - sign, - TimestampMicrosecondType, - IntervalMonthDayNanoType - )), - (Timestamp(Nanosecond, tz), Interval(YearMonth)) => Ok(ts_interval_op!( - array_lhs, - array_rhs, - tz, - as_timestamp_nanosecond_array, - as_interval_ym_array, - nanoseconds_add_array::, - sign, - TimestampNanosecondType, - IntervalYearMonthType - )), - (Timestamp(Nanosecond, tz), Interval(DayTime)) => Ok(ts_interval_op!( - array_lhs, - array_rhs, - tz, - as_timestamp_nanosecond_array, - as_interval_dt_array, - nanoseconds_add_array::, - sign, - TimestampNanosecondType, - IntervalDayTimeType - )), - (Timestamp(Nanosecond, tz), Interval(MonthDayNano)) => Ok(ts_interval_op!( - array_lhs, - array_rhs, - tz, - as_timestamp_nanosecond_array, - as_interval_mdn_array, - nanoseconds_add_array::, - sign, - TimestampNanosecondType, - IntervalMonthDayNanoType - )), - (_, _) => Err(DataFusionError::Execution(format!( - "Invalid array types for Timestamp Interval operation: {} {} {}", - array_lhs.data_type(), - sign, - array_rhs.data_type() - ))), - } -} - -#[inline] -pub fn date32_interval_ym_op( - right: &Arc, - epoch: &NaiveDate, - prior: &NaiveDate, - month_op: fn(NaiveDate, Months) -> Option, -) -> Result { - let right: &PrimitiveArray = right.as_primitive(); - let ret = Arc::new(try_unary::( - right, - |ym| { - let months = Months::new(ym.try_into().map_err(|_| { - DataFusionError::Internal( - "Interval values cannot be casted as unsigned integers".to_string(), - ) - })?); - let value = month_op(*prior, months).ok_or_else(|| { - DataFusionError::Internal("Resulting date is out of range".to_string()) - })?; - Ok((value - *epoch).num_days() as i32) - }, - )?) as _; - Ok(ret) -} - -#[inline] -pub fn date32_interval_dt_op( - right: &Arc, - epoch: &NaiveDate, - prior: &NaiveDate, - day_op: fn(NaiveDate, Days) -> Option, -) -> Result { - let right: &PrimitiveArray = right.as_primitive(); - let ret = Arc::new(try_unary::( - right, - |dt| { - let (days, millis) = IntervalDayTimeType::to_parts(dt); - let days = Days::new(days.try_into().map_err(|_| { - DataFusionError::Internal( - "Interval values cannot be casted as unsigned integers".to_string(), - ) - })?); - let value = day_op(*prior, days).ok_or_else(|| { - DataFusionError::Internal("Resulting date is out of range".to_string()) - })?; - let milli_days = millis as i64 / MILLISECONDS_IN_DAY; - Ok(((value - *epoch).num_days() - milli_days) as i32) - }, - )?) as _; - Ok(ret) -} - -#[inline] -pub fn date32_interval_mdn_op( - right: &Arc, - epoch: &NaiveDate, - prior: &NaiveDate, - day_op: fn(NaiveDate, Days) -> Option, - month_op: fn(NaiveDate, Months) -> Option, -) -> Result { - let cast_err = |_| { - DataFusionError::Internal( - "Interval values cannot be casted as unsigned integers".to_string(), - ) - }; - let out_of_range = - || DataFusionError::Internal("Resulting date is out of range".to_string()); - let right: &PrimitiveArray = right.as_primitive(); - let ret = Arc::new(try_unary::( - right, - |mdn| { - let (months, days, nanos) = IntervalMonthDayNanoType::to_parts(mdn); - let months_obj = Months::new(months.try_into().map_err(cast_err)?); - let month_diff = month_op(*prior, months_obj).ok_or_else(out_of_range)?; - let days_obj = Days::new(days.try_into().map_err(cast_err)?); - let value = day_op(month_diff, days_obj).ok_or_else(out_of_range)?; - let nano_days = nanos / NANOSECONDS_IN_DAY; - Ok(((value - *epoch).num_days() - nano_days) as i32) - }, - )?) as _; - Ok(ret) -} - -#[inline] -pub fn date64_interval_ym_op( - right: &Arc, - epoch: &NaiveDate, - prior: &NaiveDate, - month_op: fn(NaiveDate, Months) -> Option, -) -> Result { - let right: &PrimitiveArray = right.as_primitive(); - let ret = Arc::new(try_unary::( - right, - |ym| { - let months_obj = Months::new(ym.try_into().map_err(|_| { - DataFusionError::Internal( - "Interval values cannot be casted as unsigned integers".to_string(), - ) - })?); - let date = month_op(*prior, months_obj).ok_or_else(|| { - DataFusionError::Internal("Resulting date is out of range".to_string()) - })?; - Ok((date - *epoch).num_milliseconds()) - }, - )?) as _; - Ok(ret) -} - -#[inline] -pub fn date64_interval_dt_op( - right: &Arc, - epoch: &NaiveDate, - prior: &NaiveDate, - day_op: fn(NaiveDate, Days) -> Option, -) -> Result { - let right: &PrimitiveArray = right.as_primitive(); - let ret = Arc::new(try_unary::( - right, - |dt| { - let (days, millis) = IntervalDayTimeType::to_parts(dt); - let days_obj = Days::new(days.try_into().map_err(|_| { - DataFusionError::Internal( - "Interval values cannot be casted as unsigned integers".to_string(), - ) - })?); - let date = day_op(*prior, days_obj).ok_or_else(|| { - DataFusionError::Internal("Resulting date is out of range".to_string()) - })?; - Ok((date - *epoch).num_milliseconds() - millis as i64) - }, - )?) as _; - Ok(ret) -} - -#[inline] -pub fn date64_interval_mdn_op( - right: &Arc, - epoch: &NaiveDate, - prior: &NaiveDate, - day_op: fn(NaiveDate, Days) -> Option, - month_op: fn(NaiveDate, Months) -> Option, -) -> Result { - let cast_err = |_| { - DataFusionError::Internal( - "Interval values cannot be casted as unsigned integers".to_string(), - ) - }; - let out_of_range = - || DataFusionError::Internal("Resulting date is out of range".to_string()); - let right: &PrimitiveArray = right.as_primitive(); - let ret = Arc::new(try_unary::( - right, - |mdn| { - let (months, days, nanos) = IntervalMonthDayNanoType::to_parts(mdn); - let months_obj = Months::new(months.try_into().map_err(cast_err)?); - let month_diff = month_op(*prior, months_obj).ok_or_else(out_of_range)?; - let days_obj = Days::new(days.try_into().map_err(cast_err)?); - let value = day_op(month_diff, days_obj).ok_or_else(out_of_range)?; - Ok((value - *epoch).num_milliseconds() - nanos / 1_000_000) - }, - )?) as _; - Ok(ret) -} - #[cfg(test)] mod tests { use super::*; - use datafusion_expr::type_coercion::binary::decimal_op_mathematics_type; - use datafusion_expr::Operator; fn create_decimal_array( array: &[Option], @@ -2111,194 +339,6 @@ mod tests { Ok(()) } - #[test] - fn arithmetic_decimal_op_test() -> Result<()> { - let value_i128: i128 = 123; - let left_decimal_array = create_decimal_array( - &[ - Some(value_i128), - None, - Some(value_i128 - 1), - Some(value_i128 + 1), - ], - 25, - 3, - ); - let right_decimal_array = create_decimal_array( - &[ - Some(value_i128), - Some(value_i128), - Some(value_i128), - Some(value_i128), - ], - 25, - 3, - ); - // add - let result_type = decimal_op_mathematics_type( - &Operator::Plus, - left_decimal_array.data_type(), - right_decimal_array.data_type(), - ) - .unwrap(); - let result = - add_dyn_decimal(&left_decimal_array, &right_decimal_array, &result_type)?; - let result = as_decimal128_array(&result)?; - let expect = - create_decimal_array(&[Some(246), None, Some(245), Some(247)], 26, 3); - assert_eq!(&expect, result); - let result = add_decimal_dyn_scalar(&left_decimal_array, 10, &result_type)?; - let result = as_decimal128_array(&result)?; - let expect = - create_decimal_array(&[Some(133), None, Some(132), Some(134)], 26, 3); - assert_eq!(&expect, result); - // subtract - let result_type = decimal_op_mathematics_type( - &Operator::Minus, - left_decimal_array.data_type(), - right_decimal_array.data_type(), - ) - .unwrap(); - let result = subtract_dyn_decimal( - &left_decimal_array, - &right_decimal_array, - &result_type, - )?; - let result = as_decimal128_array(&result)?; - let expect = create_decimal_array(&[Some(0), None, Some(-1), Some(1)], 26, 3); - assert_eq!(&expect, result); - let result = subtract_decimal_dyn_scalar(&left_decimal_array, 10, &result_type)?; - let result = as_decimal128_array(&result)?; - let expect = - create_decimal_array(&[Some(113), None, Some(112), Some(114)], 26, 3); - assert_eq!(&expect, result); - // multiply - let result_type = decimal_op_mathematics_type( - &Operator::Multiply, - left_decimal_array.data_type(), - right_decimal_array.data_type(), - ) - .unwrap(); - let result = multiply_dyn_decimal( - &left_decimal_array, - &right_decimal_array, - &result_type, - )?; - let result = as_decimal128_array(&result)?; - let expect = - create_decimal_array(&[Some(15129), None, Some(15006), Some(15252)], 38, 6); - assert_eq!(&expect, result); - let result = multiply_decimal_dyn_scalar(&left_decimal_array, 10, &result_type)?; - let result = as_decimal128_array(&result)?; - let expect = - create_decimal_array(&[Some(1230), None, Some(1220), Some(1240)], 38, 6); - assert_eq!(&expect, result); - // divide - let result_type = decimal_op_mathematics_type( - &Operator::Divide, - left_decimal_array.data_type(), - right_decimal_array.data_type(), - ) - .unwrap(); - let left_decimal_array = create_decimal_array( - &[ - Some(1234567), - None, - Some(1234567), - Some(1234567), - Some(1234567), - ], - 25, - 3, - ); - let right_decimal_array = create_decimal_array( - &[Some(10), Some(100), Some(55), Some(-123), None], - 25, - 3, - ); - let result = divide_dyn_checked_decimal( - &left_decimal_array, - &right_decimal_array, - &result_type, - )?; - let result = as_decimal128_array(&result)?; - let expect = create_decimal_array( - &[ - Some(12345670000000000000000000000000000), - None, - Some(2244667272727272727272727272727272), - Some(-1003713008130081300813008130081300), - None, - ], - 38, - 29, - ); - assert_eq!(&expect, result); - let result = divide_decimal_dyn_scalar(&left_decimal_array, 10, &result_type)?; - let result = as_decimal128_array(&result)?; - let expect = create_decimal_array( - &[ - Some(12345670000000000000000000000000000), - None, - Some(12345670000000000000000000000000000), - Some(12345670000000000000000000000000000), - Some(12345670000000000000000000000000000), - ], - 38, - 29, - ); - assert_eq!(&expect, result); - // modulus - let result_type = decimal_op_mathematics_type( - &Operator::Modulo, - left_decimal_array.data_type(), - right_decimal_array.data_type(), - ) - .unwrap(); - let result = - modulus_dyn_decimal(&left_decimal_array, &right_decimal_array, &result_type)?; - let result = as_decimal128_array(&result)?; - let expect = - create_decimal_array(&[Some(7), None, Some(37), Some(16), None], 25, 3); - assert_eq!(&expect, result); - let result = modulus_decimal_dyn_scalar(&left_decimal_array, 10, &result_type)?; - let result = as_decimal128_array(&result)?; - let expect = - create_decimal_array(&[Some(7), None, Some(7), Some(7), Some(7)], 25, 3); - assert_eq!(&expect, result); - - Ok(()) - } - - #[test] - fn arithmetic_decimal_divide_by_zero() { - let left_decimal_array = create_decimal_array(&[Some(101)], 10, 1); - let right_decimal_array = create_decimal_array(&[Some(0)], 1, 1); - - let result_type = decimal_op_mathematics_type( - &Operator::Divide, - left_decimal_array.data_type(), - right_decimal_array.data_type(), - ) - .unwrap(); - let err = - divide_decimal_dyn_scalar(&left_decimal_array, 0, &result_type).unwrap_err(); - assert_eq!("Arrow error: Divide by zero error", err.to_string()); - let result_type = decimal_op_mathematics_type( - &Operator::Modulo, - left_decimal_array.data_type(), - right_decimal_array.data_type(), - ) - .unwrap(); - let err = - modulus_dyn_decimal(&left_decimal_array, &right_decimal_array, &result_type) - .unwrap_err(); - assert_eq!("Arrow error: Divide by zero error", err.to_string()); - let err = - modulus_decimal_dyn_scalar(&left_decimal_array, 0, &result_type).unwrap_err(); - assert_eq!("Arrow error: Divide by zero error", err.to_string()); - } - #[test] fn is_distinct_from_non_nulls() -> Result<()> { let left_int_array = @@ -2361,75 +401,4 @@ mod tests { ); Ok(()) } - - #[test] - fn test_decimal_multiply_fixed_point_dyn() { - // [123456789] - let a = Decimal128Array::from(vec![123456789000000000000000000]) - .with_precision_and_scale(38, 18) - .unwrap(); - - // [10] - let b = Decimal128Array::from(vec![10000000000000000000]) - .with_precision_and_scale(38, 18) - .unwrap(); - - // Avoid overflow by reducing the scale. - let result = multiply_fixed_point_dyn(&a, &b, 28).unwrap(); - // [1234567890] - let expected = Arc::new( - Decimal128Array::from(vec![12345678900000000000000000000000000000]) - .with_precision_and_scale(38, 28) - .unwrap(), - ) as ArrayRef; - - assert_eq!(&expected, &result); - assert_eq!( - result.as_primitive::().value_as_string(0), - "1234567890.0000000000000000000000000000" - ); - - // [123456789, 10, 10] - let a = Decimal128Array::from(vec![ - 123456789000000000000000000, - 10000000000000000000, - 10000000000000000000, - ]) - .with_precision_and_scale(38, 18) - .unwrap(); - - // [10, 123456789, 12] - let b = Decimal128Array::from(vec![ - 10000000000000000000, - 123456789000000000000000000, - 12000000000000000000, - ]) - .with_precision_and_scale(38, 18) - .unwrap(); - - let result = multiply_fixed_point_dyn(&a, &b, 28).unwrap(); - let expected = Arc::new( - Decimal128Array::from(vec![ - Some(12345678900000000000000000000000000000), - Some(12345678900000000000000000000000000000), - Some(1200000000000000000000000000000), - ]) - .with_precision_and_scale(38, 28) - .unwrap(), - ) as ArrayRef; - - assert_eq!(&expected, &result); - assert_eq!( - result.as_primitive::().value_as_string(0), - "1234567890.0000000000000000000000000000" - ); - assert_eq!( - result.as_primitive::().value_as_string(1), - "1234567890.0000000000000000000000000000" - ); - assert_eq!( - result.as_primitive::().value_as_string(2), - "120.0000000000000000000000000000" - ); - } } diff --git a/datafusion/physical-expr/src/expressions/datetime.rs b/datafusion/physical-expr/src/expressions/datetime.rs deleted file mode 100644 index 4d0ee5cc7dbc..000000000000 --- a/datafusion/physical-expr/src/expressions/datetime.rs +++ /dev/null @@ -1,931 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use crate::intervals::cp_solver::{propagate_arithmetic, propagate_comparison}; -use crate::intervals::{apply_operator, Interval}; -use crate::physical_expr::down_cast_any_ref; -use crate::PhysicalExpr; -use arrow::datatypes::{DataType, Schema}; -use arrow::record_batch::RecordBatch; - -use datafusion_common::{DataFusionError, Result}; -use datafusion_expr::type_coercion::binary::get_result_type; -use datafusion_expr::{ColumnarValue, Operator}; -use std::any::Any; -use std::fmt::{Display, Formatter}; -use std::hash::{Hash, Hasher}; -use std::sync::Arc; - -use super::binary::{resolve_temporal_op, resolve_temporal_op_scalar}; - -/// Perform DATE/TIME/TIMESTAMP +/ INTERVAL math -#[derive(Debug, Hash)] -pub struct DateTimeIntervalExpr { - lhs: Arc, - op: Operator, - rhs: Arc, -} - -impl DateTimeIntervalExpr { - /// Create a new instance of DateIntervalExpr - pub fn new( - lhs: Arc, - op: Operator, - rhs: Arc, - ) -> Self { - Self { lhs, op, rhs } - } - - /// Get the left-hand side expression - pub fn lhs(&self) -> &Arc { - &self.lhs - } - - /// Get the operator - pub fn op(&self) -> &Operator { - &self.op - } - - /// Get the right-hand side expression - pub fn rhs(&self) -> &Arc { - &self.rhs - } -} - -impl Display for DateTimeIntervalExpr { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - write!(f, "{} {} {}", self.lhs, self.op, self.rhs) - } -} - -impl PhysicalExpr for DateTimeIntervalExpr { - fn as_any(&self) -> &dyn Any { - self - } - - fn data_type(&self, input_schema: &Schema) -> Result { - get_result_type( - &self.lhs.data_type(input_schema)?, - &Operator::Minus, - &self.rhs.data_type(input_schema)?, - ) - } - - fn nullable(&self, input_schema: &Schema) -> Result { - self.lhs.nullable(input_schema) - } - - fn evaluate(&self, batch: &RecordBatch) -> Result { - let lhs_value = self.lhs.evaluate(batch)?; - let rhs_value = self.rhs.evaluate(batch)?; - // Invert sign for subtraction - let sign = match self.op { - Operator::Plus => 1, - Operator::Minus => -1, - _ => { - // this should be unreachable because we check the operators in `try_new` - let msg = "Invalid operator for DateIntervalExpr"; - return Err(DataFusionError::Internal(msg.to_string())); - } - }; - // RHS is first checked. If it is a Scalar, there are 2 options: - // Either LHS is also a Scalar and matching operation is applied, - // or LHS is an Array and unary operations for related types are - // applied in evaluate_array function. If RHS is an Array, then - // LHS must also be, moreover; they must be the same Timestamp type. - match (lhs_value, rhs_value) { - (ColumnarValue::Scalar(operand_lhs), ColumnarValue::Scalar(operand_rhs)) => { - Ok(ColumnarValue::Scalar(if sign > 0 { - operand_lhs.add(&operand_rhs)? - } else { - operand_lhs.sub(&operand_rhs)? - })) - } - // This function evaluates temporal array vs scalar operations, such as timestamp - timestamp, - // interval + interval, timestamp + interval, and interval + timestamp. It takes one array and one scalar as input - // and an integer sign representing the operation (+1 for addition and -1 for subtraction). - (ColumnarValue::Array(arr), ColumnarValue::Scalar(scalar)) => { - Ok(ColumnarValue::Array(resolve_temporal_op_scalar( - &arr, sign, &scalar, false, - )?)) - } - // This function evaluates operations between a scalar value and an array of temporal - // values. One example is calculating the duration between a scalar timestamp and an - // array of timestamps (i.e. `now() - some_column`). - (ColumnarValue::Scalar(scalar), ColumnarValue::Array(arr)) => { - Ok(ColumnarValue::Array(resolve_temporal_op_scalar( - &arr, sign, &scalar, true, - )?)) - } - // This function evaluates temporal array operations, such as timestamp - timestamp, interval + interval, - // timestamp + interval, and interval + timestamp. It takes two arrays as input and an integer sign representing - // the operation (+1 for addition and -1 for subtraction). - (ColumnarValue::Array(array_lhs), ColumnarValue::Array(array_rhs)) => Ok( - ColumnarValue::Array(resolve_temporal_op(&array_lhs, sign, &array_rhs)?), - ), - } - } - - fn evaluate_bounds(&self, children: &[&Interval]) -> Result { - // Get children intervals: - let left_interval = children[0]; - let right_interval = children[1]; - // Calculate current node's interval: - apply_operator(&self.op, left_interval, right_interval) - } - - fn propagate_constraints( - &self, - interval: &Interval, - children: &[&Interval], - ) -> Result>> { - // Get children intervals. Graph brings - let left_interval = children[0]; - let right_interval = children[1]; - let (left, right) = if self.op.is_comparison_operator() { - if interval == &Interval::CERTAINLY_FALSE { - // TODO: We will handle strictly false clauses by negating - // the comparison operator (e.g. GT to LE, LT to GE) - // once open/closed intervals are supported. - return Ok(vec![]); - } - // Propagate the comparison operator. - propagate_comparison(&self.op, left_interval, right_interval)? - } else { - // Propagate the arithmetic operator. - propagate_arithmetic(&self.op, interval, left_interval, right_interval)? - }; - Ok(vec![left, right]) - } - - fn children(&self) -> Vec> { - vec![self.lhs.clone(), self.rhs.clone()] - } - - fn with_new_children( - self: Arc, - children: Vec>, - ) -> Result> { - Ok(Arc::new(DateTimeIntervalExpr::new( - children[0].clone(), - self.op, - children[1].clone(), - ))) - } - - fn dyn_hash(&self, state: &mut dyn Hasher) { - let mut s = state; - self.hash(&mut s); - } -} - -impl PartialEq for DateTimeIntervalExpr { - fn eq(&self, other: &dyn Any) -> bool { - down_cast_any_ref(other) - .downcast_ref::() - .map(|x| self.lhs.eq(&x.lhs) && self.op == x.op && self.rhs.eq(&x.rhs)) - .unwrap_or(false) - } -} - -/// create a DateIntervalExpr -pub fn date_time_interval_expr( - lhs: Arc, - op: Operator, - rhs: Arc, - input_schema: &Schema, -) -> Result> { - match ( - lhs.data_type(input_schema)?, - op, - rhs.data_type(input_schema)?, - ) { - ( - DataType::Date32 | DataType::Date64 | DataType::Timestamp(_, _), - Operator::Plus | Operator::Minus, - DataType::Interval(_), - ) - | (DataType::Timestamp(_, _), Operator::Minus, DataType::Timestamp(_, _)) - | (DataType::Interval(_), Operator::Plus, DataType::Timestamp(_, _)) - | ( - DataType::Interval(_), - Operator::Plus | Operator::Minus, - DataType::Interval(_), - ) => Ok(Arc::new(DateTimeIntervalExpr::new(lhs, op, rhs))), - (lhs, _, rhs) => Err(DataFusionError::Execution(format!( - "Invalid operation {op} between '{lhs}' and '{rhs}' for DateIntervalExpr" - ))), - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::create_physical_expr; - use crate::execution_props::ExecutionProps; - use arrow::array::{ArrayRef, Date32Builder}; - use arrow::datatypes::*; - use arrow_array::IntervalMonthDayNanoArray; - use chrono::{Duration, NaiveDate}; - use datafusion_common::{Column, Result, ScalarValue, ToDFSchema}; - use datafusion_expr::Expr; - use std::ops::Add; - - #[test] - fn add_32_day_time() -> Result<()> { - // setup - let dt = Expr::Literal(ScalarValue::Date32(Some(0))); - let op = Operator::Plus; - let interval = Expr::Literal(ScalarValue::new_interval_dt(1, 0)); - - // exercise - let res = exercise(&dt, op, &interval)?; - - // assert - match res { - ColumnarValue::Scalar(ScalarValue::Date32(Some(d))) => { - let epoch = NaiveDate::from_ymd_opt(1970, 1, 1).unwrap(); - let res = epoch.add(Duration::days(d as i64)); - assert_eq!(format!("{res:?}").as_str(), "1970-01-02"); - } - _ => Err(DataFusionError::NotImplemented( - "Unexpected result!".to_string(), - ))?, - } - - Ok(()) - } - - #[test] - fn sub_32_year_month() -> Result<()> { - // setup - let dt = Expr::Literal(ScalarValue::Date32(Some(0))); - let op = Operator::Minus; - let interval = Expr::Literal(ScalarValue::IntervalYearMonth(Some(13))); - - // exercise - let res = exercise(&dt, op, &interval)?; - - // assert - match res { - ColumnarValue::Scalar(ScalarValue::Date32(Some(d))) => { - let epoch = NaiveDate::from_ymd_opt(1970, 1, 1).unwrap(); - let res = epoch.add(Duration::days(d as i64)); - assert_eq!(format!("{res:?}").as_str(), "1968-12-01"); - } - _ => Err(DataFusionError::NotImplemented( - "Unexpected result!".to_string(), - ))?, - } - - Ok(()) - } - - #[test] - fn add_64_day_time() -> Result<()> { - // setup - let dt = Expr::Literal(ScalarValue::Date64(Some(0))); - let op = Operator::Plus; - let interval = - Expr::Literal(ScalarValue::new_interval_dt(-15, -24 * 60 * 60 * 1000)); - - // exercise - let res = exercise(&dt, op, &interval)?; - - // assert - match res { - ColumnarValue::Scalar(ScalarValue::Date64(Some(d))) => { - let epoch = NaiveDate::from_ymd_opt(1970, 1, 1).unwrap(); - let res = epoch.add(Duration::milliseconds(d)); - assert_eq!(format!("{res:?}").as_str(), "1969-12-16"); - } - _ => Err(DataFusionError::NotImplemented( - "Unexpected result!".to_string(), - ))?, - } - - Ok(()) - } - - #[test] - fn add_32_year_month() -> Result<()> { - // setup - let dt = Expr::Literal(ScalarValue::Date32(Some(0))); - let op = Operator::Plus; - let interval = Expr::Literal(ScalarValue::IntervalYearMonth(Some(1))); - - // exercise - let res = exercise(&dt, op, &interval)?; - - // assert - match res { - ColumnarValue::Scalar(ScalarValue::Date32(Some(d))) => { - let epoch = NaiveDate::from_ymd_opt(1970, 1, 1).unwrap(); - let res = epoch.add(Duration::days(d as i64)); - assert_eq!(format!("{res:?}").as_str(), "1970-02-01"); - } - _ => Err(DataFusionError::NotImplemented( - "Unexpected result!".to_string(), - ))?, - } - - Ok(()) - } - - #[test] - fn add_32_month_day_nano() -> Result<()> { - // setup - let dt = Expr::Literal(ScalarValue::Date32(Some(0))); - let op = Operator::Plus; - let interval = Expr::Literal(ScalarValue::new_interval_mdn(-12, -15, -42)); - - // exercise - let res = exercise(&dt, op, &interval)?; - - // assert - match res { - ColumnarValue::Scalar(ScalarValue::Date32(Some(d))) => { - let epoch = NaiveDate::from_ymd_opt(1970, 1, 1).unwrap(); - let res = epoch.add(Duration::days(d as i64)); - assert_eq!(format!("{res:?}").as_str(), "1968-12-17"); - } - _ => Err(DataFusionError::NotImplemented( - "Unexpected result!".to_string(), - ))?, - } - - Ok(()) - } - - #[test] - fn add_1_millisecond() -> Result<()> { - // setup - let now_ts_ns = chrono::Utc::now().timestamp_nanos(); - let dt = Expr::Literal(ScalarValue::TimestampNanosecond(Some(now_ts_ns), None)); - let op = Operator::Plus; - let interval = Expr::Literal(ScalarValue::new_interval_dt(0, 1)); - - // exercise - let res = exercise(&dt, op, &interval)?; - - // assert - match res { - ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(Some(ts), None)) => { - assert_eq!(ts, now_ts_ns + 1_000_000); - } - _ => Err(DataFusionError::NotImplemented( - "Unexpected result!".to_string(), - ))?, - } - Ok(()) - } - - #[test] - fn add_2_hours() -> Result<()> { - // setup - let now_ts_s = chrono::Utc::now().timestamp(); - let dt = Expr::Literal(ScalarValue::TimestampSecond(Some(now_ts_s), None)); - let op = Operator::Plus; - let interval = Expr::Literal(ScalarValue::new_interval_dt(0, 2 * 3600 * 1_000)); - - // exercise - let res = exercise(&dt, op, &interval)?; - - // assert - match res { - ColumnarValue::Scalar(ScalarValue::TimestampSecond(Some(ts), None)) => { - assert_eq!(ts, now_ts_s + 2 * 3600); - } - _ => Err(DataFusionError::NotImplemented( - "Unexpected result!".to_string(), - ))?, - } - Ok(()) - } - - #[test] - fn sub_4_hours() -> Result<()> { - // setup - let now_ts_s = chrono::Utc::now().timestamp(); - let dt = Expr::Literal(ScalarValue::TimestampSecond(Some(now_ts_s), None)); - let op = Operator::Minus; - let interval = Expr::Literal(ScalarValue::new_interval_dt(0, 4 * 3600 * 1_000)); - - // exercise - let res = exercise(&dt, op, &interval)?; - - // assert - match res { - ColumnarValue::Scalar(ScalarValue::TimestampSecond(Some(ts), None)) => { - assert_eq!(ts, now_ts_s - 4 * 3600); - } - _ => Err(DataFusionError::NotImplemented( - "Unexpected result!".to_string(), - ))?, - } - Ok(()) - } - - #[test] - fn add_8_days() -> Result<()> { - // setup - let now_ts_ns = chrono::Utc::now().timestamp_nanos(); - let dt = Expr::Literal(ScalarValue::TimestampNanosecond(Some(now_ts_ns), None)); - let op = Operator::Plus; - let interval = Expr::Literal(ScalarValue::new_interval_dt(8, 0)); - - // exercise - let res = exercise(&dt, op, &interval)?; - - // assert - match res { - ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(Some(ts), None)) => { - assert_eq!(ts, now_ts_ns + 8 * 86400 * 1_000_000_000); - } - _ => Err(DataFusionError::NotImplemented( - "Unexpected result!".to_string(), - ))?, - } - Ok(()) - } - - #[test] - fn sub_16_days() -> Result<()> { - // setup - let now_ts_ns = chrono::Utc::now().timestamp_nanos(); - let dt = Expr::Literal(ScalarValue::TimestampNanosecond(Some(now_ts_ns), None)); - let op = Operator::Minus; - let interval = Expr::Literal(ScalarValue::new_interval_dt(16, 0)); - - // exercise - let res = exercise(&dt, op, &interval)?; - - // assert - match res { - ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(Some(ts), None)) => { - assert_eq!(ts, now_ts_ns - 16 * 86400 * 1_000_000_000); - } - _ => Err(DataFusionError::NotImplemented( - "Unexpected result!".to_string(), - ))?, - } - Ok(()) - } - - #[test] - fn array_add_26_days() -> Result<()> { - let mut builder = Date32Builder::with_capacity(8); - builder.append_slice(&[0, 1, 2, 3, 4, 5, 6, 7]); - let a: ArrayRef = Arc::new(builder.finish()); - - let schema = Schema::new(vec![Field::new("a", DataType::Date32, false)]); - let batch = RecordBatch::try_new(Arc::new(schema.clone()), vec![a])?; - let dfs = schema.clone().to_dfschema()?; - let props = ExecutionProps::new(); - - let dt = Expr::Column(Column::from_name("a")); - let interval = Expr::Literal(ScalarValue::new_interval_dt(26, 0)); - let op = Operator::Plus; - - let lhs = create_physical_expr(&dt, &dfs, &schema, &props)?; - let rhs = create_physical_expr(&interval, &dfs, &schema, &props)?; - - let cut = date_time_interval_expr(lhs, op, rhs, &schema)?; - let res = cut.evaluate(&batch)?; - - let mut builder = Date32Builder::with_capacity(8); - builder.append_slice(&[26, 27, 28, 29, 30, 31, 32, 33]); - let expected: ArrayRef = Arc::new(builder.finish()); - - // assert - match res { - ColumnarValue::Array(array) => { - assert_eq!(&array, &expected) - } - _ => Err(DataFusionError::NotImplemented( - "Unexpected result!".to_string(), - ))?, - } - - Ok(()) - } - - #[test] - fn invalid_interval() -> Result<()> { - // setup - let dt = Expr::Literal(ScalarValue::Date32(Some(0))); - let op = Operator::Plus; - let interval = Expr::Literal(ScalarValue::Null); - - // exercise - let res = exercise(&dt, op, &interval); - assert!(res.is_err(), "Can't add a NULL interval"); - - Ok(()) - } - - #[test] - fn invalid_date() -> Result<()> { - // setup - let dt = Expr::Literal(ScalarValue::Null); - let op = Operator::Plus; - let interval = Expr::Literal(ScalarValue::IntervalMonthDayNano(Some(0))); - - // exercise - let res = exercise(&dt, op, &interval); - assert!(res.is_err(), "Can't add to NULL date"); - - Ok(()) - } - - #[test] - fn invalid_op() -> Result<()> { - // setup - let dt = Expr::Literal(ScalarValue::Date32(Some(0))); - let op = Operator::Eq; - let interval = Expr::Literal(ScalarValue::IntervalMonthDayNano(Some(0))); - - // exercise - let res = exercise(&dt, op, &interval); - assert!(res.is_err(), "Can't add dates with == operator"); - - Ok(()) - } - - fn exercise(dt: &Expr, op: Operator, interval: &Expr) -> Result { - let mut builder = Date32Builder::with_capacity(1); - builder.append_value(0); - let a: ArrayRef = Arc::new(builder.finish()); - let schema = Schema::new(vec![Field::new("a", DataType::Date32, false)]); - let batch = RecordBatch::try_new(Arc::new(schema.clone()), vec![a])?; - - let dfs = schema.clone().to_dfschema()?; - let props = ExecutionProps::new(); - - let lhs = create_physical_expr(dt, &dfs, &schema, &props)?; - let rhs = create_physical_expr(interval, &dfs, &schema, &props)?; - - let lhs_str = format!("{lhs}"); - let rhs_str = format!("{rhs}"); - - let cut = DateTimeIntervalExpr::new(lhs, op, rhs); - - assert_eq!(lhs_str, format!("{}", cut.lhs())); - assert_eq!(op, cut.op().clone()); - assert_eq!(rhs_str, format!("{}", cut.rhs())); - - let res = cut.evaluate(&batch)?; - Ok(res) - } - - // In this test, ArrayRef of one element arrays is evaluated with some ScalarValues, - // aiming that resolve_temporal_op_scalar function is working properly and shows the same - // behavior with ScalarValue arithmetic. - fn experiment( - timestamp_scalar: ScalarValue, - interval_scalar: ScalarValue, - ) -> Result<()> { - let timestamp_array = timestamp_scalar.to_array(); - let interval_array = interval_scalar.to_array(); - - // timestamp + interval - let res1 = - resolve_temporal_op_scalar(×tamp_array, 1, &interval_scalar, false)?; - let res2 = timestamp_scalar.add(&interval_scalar)?.to_array(); - assert_eq!( - &res1, &res2, - "Timestamp Scalar={timestamp_scalar} + Interval Scalar={interval_scalar}" - ); - let res1 = - resolve_temporal_op_scalar(×tamp_array, 1, &interval_scalar, true)?; - let res2 = interval_scalar.add(×tamp_scalar)?.to_array(); - assert_eq!( - &res1, &res2, - "Timestamp Scalar={timestamp_scalar} + Interval Scalar={interval_scalar}" - ); - - // timestamp - interval - let res1 = - resolve_temporal_op_scalar(×tamp_array, -1, &interval_scalar, false)?; - let res2 = timestamp_scalar.sub(&interval_scalar)?.to_array(); - assert_eq!( - &res1, &res2, - "Timestamp Scalar={timestamp_scalar} - Interval Scalar={interval_scalar}" - ); - - // timestamp - timestamp - let res1 = - resolve_temporal_op_scalar(×tamp_array, -1, ×tamp_scalar, false)?; - let res2 = timestamp_scalar.sub(×tamp_scalar)?.to_array(); - assert_eq!( - &res1, &res2, - "Timestamp Scalar={timestamp_scalar} - Timestamp Scalar={timestamp_scalar}" - ); - let res1 = - resolve_temporal_op_scalar(×tamp_array, -1, ×tamp_scalar, true)?; - let res2 = timestamp_scalar.sub(×tamp_scalar)?.to_array(); - assert_eq!( - &res1, &res2, - "Timestamp Scalar={timestamp_scalar} - Timestamp Scalar={timestamp_scalar}" - ); - - // interval - interval - let res1 = - resolve_temporal_op_scalar(&interval_array, -1, &interval_scalar, false)?; - let res2 = interval_scalar.sub(&interval_scalar)?.to_array(); - assert_eq!( - &res1, &res2, - "Interval Scalar={interval_scalar} - Interval Scalar={interval_scalar}" - ); - let res1 = - resolve_temporal_op_scalar(&interval_array, -1, &interval_scalar, true)?; - let res2 = interval_scalar.sub(&interval_scalar)?.to_array(); - assert_eq!( - &res1, &res2, - "Interval Scalar={interval_scalar} - Interval Scalar={interval_scalar}" - ); - - // interval + interval - let res1 = - resolve_temporal_op_scalar(&interval_array, 1, &interval_scalar, false)?; - let res2 = interval_scalar.add(&interval_scalar)?.to_array(); - assert_eq!( - &res1, &res2, - "Interval Scalar={interval_scalar} + Interval Scalar={interval_scalar}" - ); - let res1 = - resolve_temporal_op_scalar(&interval_array, 1, &interval_scalar, true)?; - let res2 = interval_scalar.add(&interval_scalar)?.to_array(); - assert_eq!( - &res1, &res2, - "Interval Scalar={interval_scalar} + Interval Scalar={interval_scalar}" - ); - - Ok(()) - } - #[test] - fn test_evalute_with_scalar() -> Result<()> { - // Timestamp (sec) & Interval (DayTime) - let timestamp_scalar = ScalarValue::TimestampSecond( - Some( - NaiveDate::from_ymd_opt(2023, 1, 1) - .unwrap() - .and_hms_opt(0, 0, 0) - .unwrap() - .timestamp(), - ), - None, - ); - let interval_scalar = ScalarValue::new_interval_dt(0, 1_000); - - experiment(timestamp_scalar, interval_scalar)?; - - // Timestamp (millisec) & Interval (DayTime) - let timestamp_scalar = ScalarValue::TimestampMillisecond( - Some( - NaiveDate::from_ymd_opt(2023, 1, 1) - .unwrap() - .and_hms_milli_opt(0, 0, 0, 0) - .unwrap() - .timestamp_millis(), - ), - None, - ); - let interval_scalar = ScalarValue::new_interval_dt(0, 1_000); - - experiment(timestamp_scalar, interval_scalar)?; - - // Timestamp (nanosec) & Interval (MonthDayNano) - let timestamp_scalar = ScalarValue::TimestampNanosecond( - Some( - NaiveDate::from_ymd_opt(2023, 1, 1) - .unwrap() - .and_hms_nano_opt(0, 0, 0, 0) - .unwrap() - .timestamp_nanos(), - ), - None, - ); - let interval_scalar = ScalarValue::new_interval_mdn(0, 0, 1_000); - - experiment(timestamp_scalar, interval_scalar)?; - - // Timestamp (nanosec) & Interval (MonthDayNano), negatively resulting cases - - let timestamp_scalar = ScalarValue::TimestampNanosecond( - Some( - NaiveDate::from_ymd_opt(1970, 1, 1) - .unwrap() - .and_hms_nano_opt(0, 0, 0, 000) - .unwrap() - .timestamp_nanos(), - ), - None, - ); - - Arc::new(IntervalMonthDayNanoArray::from(vec![1_000])); // 1 us - let interval_scalar = ScalarValue::new_interval_mdn(0, 0, 1_000); - - experiment(timestamp_scalar, interval_scalar)?; - - // Timestamp (sec) & Interval (YearMonth) - let timestamp_scalar = ScalarValue::TimestampSecond( - Some( - NaiveDate::from_ymd_opt(2023, 1, 1) - .unwrap() - .and_hms_opt(0, 0, 0) - .unwrap() - .timestamp(), - ), - None, - ); - let interval_scalar = ScalarValue::new_interval_ym(0, 1); - - experiment(timestamp_scalar, interval_scalar)?; - - // More test with all matchings of timestamps and intervals - let timestamp_scalar = ScalarValue::TimestampSecond( - Some( - NaiveDate::from_ymd_opt(2000, 12, 31) - .unwrap() - .and_hms_opt(23, 59, 59) - .unwrap() - .timestamp(), - ), - None, - ); - let interval_scalar = ScalarValue::new_interval_ym(0, 1); - - experiment(timestamp_scalar, interval_scalar)?; - - let timestamp_scalar = ScalarValue::TimestampSecond( - Some( - NaiveDate::from_ymd_opt(2000, 12, 31) - .unwrap() - .and_hms_opt(23, 59, 59) - .unwrap() - .timestamp(), - ), - None, - ); - let interval_scalar = ScalarValue::new_interval_dt(10, 100000); - - experiment(timestamp_scalar, interval_scalar)?; - - let timestamp_scalar = ScalarValue::TimestampSecond( - Some( - NaiveDate::from_ymd_opt(2000, 12, 31) - .unwrap() - .and_hms_opt(23, 59, 59) - .unwrap() - .timestamp(), - ), - None, - ); - let interval_scalar = ScalarValue::new_interval_mdn(13, 32, 123456); - - experiment(timestamp_scalar, interval_scalar)?; - - let timestamp_scalar = ScalarValue::TimestampMillisecond( - Some( - NaiveDate::from_ymd_opt(2000, 12, 31) - .unwrap() - .and_hms_milli_opt(23, 59, 59, 909) - .unwrap() - .timestamp_millis(), - ), - None, - ); - let interval_scalar = ScalarValue::new_interval_ym(0, 1); - - experiment(timestamp_scalar, interval_scalar)?; - - let timestamp_scalar = ScalarValue::TimestampMillisecond( - Some( - NaiveDate::from_ymd_opt(2000, 12, 31) - .unwrap() - .and_hms_milli_opt(23, 59, 59, 909) - .unwrap() - .timestamp_millis(), - ), - None, - ); - let interval_scalar = ScalarValue::new_interval_dt(10, 100000); - - experiment(timestamp_scalar, interval_scalar)?; - - let timestamp_scalar = ScalarValue::TimestampMillisecond( - Some( - NaiveDate::from_ymd_opt(2000, 12, 31) - .unwrap() - .and_hms_milli_opt(23, 59, 59, 909) - .unwrap() - .timestamp_millis(), - ), - None, - ); - let interval_scalar = ScalarValue::new_interval_mdn(13, 32, 123456); - - experiment(timestamp_scalar, interval_scalar)?; - - let timestamp_scalar = ScalarValue::TimestampMicrosecond( - Some( - NaiveDate::from_ymd_opt(2000, 12, 31) - .unwrap() - .and_hms_micro_opt(23, 59, 59, 987654) - .unwrap() - .timestamp_micros(), - ), - None, - ); - let interval_scalar = ScalarValue::new_interval_ym(0, 1); - - experiment(timestamp_scalar, interval_scalar)?; - - let timestamp_scalar = ScalarValue::TimestampMicrosecond( - Some( - NaiveDate::from_ymd_opt(2000, 12, 31) - .unwrap() - .and_hms_micro_opt(23, 59, 59, 987654) - .unwrap() - .timestamp_micros(), - ), - None, - ); - let interval_scalar = ScalarValue::new_interval_dt(10, 100000); - - experiment(timestamp_scalar, interval_scalar)?; - - let timestamp_scalar = ScalarValue::TimestampMicrosecond( - Some( - NaiveDate::from_ymd_opt(2000, 12, 31) - .unwrap() - .and_hms_micro_opt(23, 59, 59, 987654) - .unwrap() - .timestamp_micros(), - ), - None, - ); - let interval_scalar = ScalarValue::new_interval_mdn(13, 32, 123456); - - experiment(timestamp_scalar, interval_scalar)?; - - let timestamp_scalar = ScalarValue::TimestampNanosecond( - Some( - NaiveDate::from_ymd_opt(2000, 12, 31) - .unwrap() - .and_hms_nano_opt(23, 59, 59, 999999999) - .unwrap() - .timestamp_nanos(), - ), - None, - ); - let interval_scalar = ScalarValue::new_interval_ym(0, 1); - - experiment(timestamp_scalar, interval_scalar)?; - - let timestamp_scalar = ScalarValue::TimestampNanosecond( - Some( - NaiveDate::from_ymd_opt(2000, 12, 31) - .unwrap() - .and_hms_nano_opt(23, 59, 59, 999999999) - .unwrap() - .timestamp_nanos(), - ), - None, - ); - let interval_scalar = ScalarValue::new_interval_dt(10, 100000); - - experiment(timestamp_scalar, interval_scalar)?; - - let timestamp_scalar = ScalarValue::TimestampNanosecond( - Some( - NaiveDate::from_ymd_opt(2000, 12, 31) - .unwrap() - .and_hms_nano_opt(23, 59, 59, 999999999) - .unwrap() - .timestamp_nanos(), - ), - None, - ); - let interval_scalar = ScalarValue::new_interval_mdn(13, 32, 123456); - - experiment(timestamp_scalar, interval_scalar)?; - - Ok(()) - } -} diff --git a/datafusion/physical-expr/src/expressions/mod.rs b/datafusion/physical-expr/src/expressions/mod.rs index c660cfadcca1..cb2410e8d52f 100644 --- a/datafusion/physical-expr/src/expressions/mod.rs +++ b/datafusion/physical-expr/src/expressions/mod.rs @@ -22,7 +22,6 @@ mod binary; mod case; mod cast; mod column; -mod datetime; mod get_indexed_field; mod in_list; mod is_not_null; @@ -80,7 +79,6 @@ pub use binary::{binary, BinaryExpr}; pub use case::{case, CaseExpr}; pub use cast::{cast, cast_column, cast_with_options, CastExpr}; pub use column::{col, Column, UnKnownColumn}; -pub use datetime::{date_time_interval_expr, DateTimeIntervalExpr}; pub use get_indexed_field::GetIndexedFieldExpr; pub use in_list::{in_list, InListExpr}; pub use is_not_null::{is_not_null, IsNotNullExpr}; diff --git a/datafusion/physical-expr/src/intervals/test_utils.rs b/datafusion/physical-expr/src/intervals/test_utils.rs index 8e695c255696..075b8240353d 100644 --- a/datafusion/physical-expr/src/intervals/test_utils.rs +++ b/datafusion/physical-expr/src/intervals/test_utils.rs @@ -19,7 +19,7 @@ use std::sync::Arc; -use crate::expressions::{date_time_interval_expr, BinaryExpr, Literal}; +use crate::expressions::{binary, BinaryExpr, Literal}; use crate::PhysicalExpr; use arrow_schema::Schema; use datafusion_common::{DataFusionError, ScalarValue}; @@ -78,22 +78,10 @@ pub fn gen_conjunctive_temporal_expr( d: ScalarValue, schema: &Schema, ) -> Result, DataFusionError> { - let left_and_1 = date_time_interval_expr( - left_col.clone(), - op_1, - Arc::new(Literal::new(a)), - schema, - )?; - let left_and_2 = date_time_interval_expr( - right_col.clone(), - op_2, - Arc::new(Literal::new(b)), - schema, - )?; - let right_and_1 = - date_time_interval_expr(left_col, op_3, Arc::new(Literal::new(c)), schema)?; - let right_and_2 = - date_time_interval_expr(right_col, op_4, Arc::new(Literal::new(d)), schema)?; + let left_and_1 = binary(left_col.clone(), op_1, Arc::new(Literal::new(a)), schema)?; + let left_and_2 = binary(right_col.clone(), op_2, Arc::new(Literal::new(b)), schema)?; + let right_and_1 = binary(left_col, op_3, Arc::new(Literal::new(c)), schema)?; + let right_and_2 = binary(right_col, op_4, Arc::new(Literal::new(d)), schema)?; let left_expr = Arc::new(BinaryExpr::new(left_and_1, Operator::Gt, left_and_2)); let right_expr = Arc::new(BinaryExpr::new(right_and_1, Operator::Lt, right_and_2)); Ok(Arc::new(BinaryExpr::new( diff --git a/datafusion/physical-expr/src/planner.rs b/datafusion/physical-expr/src/planner.rs index d3eb6d1db922..96e27dce67c1 100644 --- a/datafusion/physical-expr/src/planner.rs +++ b/datafusion/physical-expr/src/planner.rs @@ -18,14 +18,12 @@ use crate::var_provider::is_system_variables; use crate::{ execution_props::ExecutionProps, - expressions::{ - self, binary, date_time_interval_expr, like, Column, GetIndexedFieldExpr, Literal, - }, + expressions::{self, binary, like, Column, GetIndexedFieldExpr, Literal}, functions, udf, var_provider::VarType, PhysicalExpr, }; -use arrow::datatypes::{DataType, Schema}; +use arrow::datatypes::Schema; use datafusion_common::{DFSchema, DataFusionError, Result, ScalarValue}; use datafusion_expr::expr::{Alias, Cast, InList, ScalarFunction, ScalarUDF}; use datafusion_expr::{ @@ -183,45 +181,14 @@ pub fn create_physical_expr( input_schema, execution_props, )?; - // Match the data types and operator to determine the appropriate expression, if - // they are supported temporal types and operations, create DateTimeIntervalExpr, - // else create BinaryExpr. - match ( - lhs.data_type(input_schema)?, - op, - rhs.data_type(input_schema)?, - ) { - ( - DataType::Date32 | DataType::Date64 | DataType::Timestamp(_, _), - Operator::Plus | Operator::Minus, - DataType::Interval(_), - ) => Ok(date_time_interval_expr(lhs, *op, rhs, input_schema)?), - ( - DataType::Interval(_), - Operator::Plus | Operator::Minus, - DataType::Date32 | DataType::Date64 | DataType::Timestamp(_, _), - ) => Ok(date_time_interval_expr(rhs, *op, lhs, input_schema)?), - ( - DataType::Timestamp(_, _), - Operator::Minus, - DataType::Timestamp(_, _), - ) => Ok(date_time_interval_expr(lhs, *op, rhs, input_schema)?), - ( - DataType::Interval(_), - Operator::Plus | Operator::Minus, - DataType::Interval(_), - ) => Ok(date_time_interval_expr(lhs, *op, rhs, input_schema)?), - _ => { - // Note that the logical planner is responsible - // for type coercion on the arguments (e.g. if one - // argument was originally Int32 and one was - // Int64 they will both be coerced to Int64). - // - // There should be no coercion during physical - // planning. - binary(lhs, *op, rhs, input_schema) - } - } + // Note that the logical planner is responsible + // for type coercion on the arguments (e.g. if one + // argument was originally Int32 and one was + // Int64 they will both be coerced to Int64). + // + // There should be no coercion during physical + // planning. + binary(lhs, *op, rhs, input_schema) } Expr::Like(Like { negated, diff --git a/datafusion/proto/proto/datafusion.proto b/datafusion/proto/proto/datafusion.proto index 81a8bc6b2342..c5b4ce9037fc 100644 --- a/datafusion/proto/proto/datafusion.proto +++ b/datafusion/proto/proto/datafusion.proto @@ -1065,6 +1065,9 @@ message PhysicalExtensionNode { // physical expressions message PhysicalExprNode { + // Was date_time_interval_expr + reserved 17; + oneof ExprType { // column references PhysicalColumn column = 1; @@ -1095,8 +1098,6 @@ message PhysicalExprNode { PhysicalScalarUdfNode scalar_udf = 16; - PhysicalDateTimeIntervalExprNode date_time_interval_expr = 17; - PhysicalLikeExprNode like_expr = 18; PhysicalGetIndexedFieldExprNode get_indexed_field_expr = 19; diff --git a/datafusion/proto/src/generated/pbjson.rs b/datafusion/proto/src/generated/pbjson.rs index 3c7763a15463..e45d02f2a6a9 100644 --- a/datafusion/proto/src/generated/pbjson.rs +++ b/datafusion/proto/src/generated/pbjson.rs @@ -13998,9 +13998,6 @@ impl serde::Serialize for PhysicalExprNode { physical_expr_node::ExprType::ScalarUdf(v) => { struct_ser.serialize_field("scalarUdf", v)?; } - physical_expr_node::ExprType::DateTimeIntervalExpr(v) => { - struct_ser.serialize_field("dateTimeIntervalExpr", v)?; - } physical_expr_node::ExprType::LikeExpr(v) => { struct_ser.serialize_field("likeExpr", v)?; } @@ -14046,8 +14043,6 @@ impl<'de> serde::Deserialize<'de> for PhysicalExprNode { "windowExpr", "scalar_udf", "scalarUdf", - "date_time_interval_expr", - "dateTimeIntervalExpr", "like_expr", "likeExpr", "get_indexed_field_expr", @@ -14072,7 +14067,6 @@ impl<'de> serde::Deserialize<'de> for PhysicalExprNode { TryCast, WindowExpr, ScalarUdf, - DateTimeIntervalExpr, LikeExpr, GetIndexedFieldExpr, } @@ -14112,7 +14106,6 @@ impl<'de> serde::Deserialize<'de> for PhysicalExprNode { "tryCast" | "try_cast" => Ok(GeneratedField::TryCast), "windowExpr" | "window_expr" => Ok(GeneratedField::WindowExpr), "scalarUdf" | "scalar_udf" => Ok(GeneratedField::ScalarUdf), - "dateTimeIntervalExpr" | "date_time_interval_expr" => Ok(GeneratedField::DateTimeIntervalExpr), "likeExpr" | "like_expr" => Ok(GeneratedField::LikeExpr), "getIndexedFieldExpr" | "get_indexed_field_expr" => Ok(GeneratedField::GetIndexedFieldExpr), _ => Err(serde::de::Error::unknown_field(value, FIELDS)), @@ -14247,13 +14240,6 @@ impl<'de> serde::Deserialize<'de> for PhysicalExprNode { return Err(serde::de::Error::duplicate_field("scalarUdf")); } expr_type__ = map.next_value::<::std::option::Option<_>>()?.map(physical_expr_node::ExprType::ScalarUdf) -; - } - GeneratedField::DateTimeIntervalExpr => { - if expr_type__.is_some() { - return Err(serde::de::Error::duplicate_field("dateTimeIntervalExpr")); - } - expr_type__ = map.next_value::<::std::option::Option<_>>()?.map(physical_expr_node::ExprType::DateTimeIntervalExpr) ; } GeneratedField::LikeExpr => { diff --git a/datafusion/proto/src/generated/prost.rs b/datafusion/proto/src/generated/prost.rs index aca90c5f57b8..875cdb9c4129 100644 --- a/datafusion/proto/src/generated/prost.rs +++ b/datafusion/proto/src/generated/prost.rs @@ -1451,7 +1451,7 @@ pub struct PhysicalExtensionNode { pub struct PhysicalExprNode { #[prost( oneof = "physical_expr_node::ExprType", - tags = "1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19" + tags = "1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 19" )] pub expr_type: ::core::option::Option, } @@ -1497,10 +1497,6 @@ pub mod physical_expr_node { WindowExpr(::prost::alloc::boxed::Box), #[prost(message, tag = "16")] ScalarUdf(super::PhysicalScalarUdfNode), - #[prost(message, tag = "17")] - DateTimeIntervalExpr( - ::prost::alloc::boxed::Box, - ), #[prost(message, tag = "18")] LikeExpr(::prost::alloc::boxed::Box), #[prost(message, tag = "19")] @@ -1605,10 +1601,10 @@ pub struct PhysicalBinaryExprNode { #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct PhysicalDateTimeIntervalExprNode { - #[prost(message, optional, boxed, tag = "1")] - pub l: ::core::option::Option<::prost::alloc::boxed::Box>, - #[prost(message, optional, boxed, tag = "2")] - pub r: ::core::option::Option<::prost::alloc::boxed::Box>, + #[prost(message, optional, tag = "1")] + pub l: ::core::option::Option, + #[prost(message, optional, tag = "2")] + pub r: ::core::option::Option, #[prost(string, tag = "3")] pub op: ::prost::alloc::string::String, } diff --git a/datafusion/proto/src/physical_plan/from_proto.rs b/datafusion/proto/src/physical_plan/from_proto.rs index 7a52e5f0d09f..d6e2c3ec7288 100644 --- a/datafusion/proto/src/physical_plan/from_proto.rs +++ b/datafusion/proto/src/physical_plan/from_proto.rs @@ -29,9 +29,7 @@ use datafusion::execution::context::ExecutionProps; use datafusion::execution::FunctionRegistry; use datafusion::logical_expr::window_function::WindowFunction; use datafusion::physical_expr::{PhysicalSortExpr, ScalarFunctionExpr}; -use datafusion::physical_plan::expressions::{ - date_time_interval_expr, GetIndexedFieldExpr, -}; +use datafusion::physical_plan::expressions::GetIndexedFieldExpr; use datafusion::physical_plan::expressions::{in_list, LikeExpr}; use datafusion::physical_plan::{ expressions::{ @@ -125,22 +123,6 @@ pub fn parse_physical_expr( input_schema, )?, )), - ExprType::DateTimeIntervalExpr(expr) => date_time_interval_expr( - parse_required_physical_expr( - expr.l.as_deref(), - registry, - "left", - input_schema, - )?, - logical_plan::from_proto::from_proto_binary_op(&expr.op)?, - parse_required_physical_expr( - expr.r.as_deref(), - registry, - "right", - input_schema, - )?, - input_schema, - )?, ExprType::AggregateExpr(_) => { return Err(DataFusionError::NotImplemented( "Cannot convert aggregate expr node to physical expression".to_owned(), diff --git a/datafusion/proto/src/physical_plan/mod.rs b/datafusion/proto/src/physical_plan/mod.rs index 7bbbe135680b..e5a1c020dfcd 100644 --- a/datafusion/proto/src/physical_plan/mod.rs +++ b/datafusion/proto/src/physical_plan/mod.rs @@ -1275,9 +1275,7 @@ mod roundtrip_tests { use datafusion::physical_expr::expressions::in_list; use datafusion::physical_expr::ScalarFunctionExpr; use datafusion::physical_plan::aggregates::PhysicalGroupBy; - use datafusion::physical_plan::expressions::{ - date_time_interval_expr, like, BinaryExpr, GetIndexedFieldExpr, - }; + use datafusion::physical_plan::expressions::{like, BinaryExpr, GetIndexedFieldExpr}; use datafusion::physical_plan::functions::make_scalar_function; use datafusion::physical_plan::projection::ProjectionExec; use datafusion::physical_plan::{functions, udaf}; @@ -1360,7 +1358,7 @@ mod roundtrip_tests { let date_expr = col("some_date", &schema)?; let literal_expr = col("some_interval", &schema)?; let date_time_interval_expr = - date_time_interval_expr(date_expr, Operator::Plus, literal_expr, &schema)?; + binary(date_expr, Operator::Plus, literal_expr, &schema)?; let plan = Arc::new(ProjectionExec::try_new( vec![(date_time_interval_expr, "result".to_string())], input, diff --git a/datafusion/proto/src/physical_plan/to_proto.rs b/datafusion/proto/src/physical_plan/to_proto.rs index aaf3569d1634..0fb221b162c0 100644 --- a/datafusion/proto/src/physical_plan/to_proto.rs +++ b/datafusion/proto/src/physical_plan/to_proto.rs @@ -48,7 +48,7 @@ use crate::protobuf::{ ScalarValue, }; use datafusion::logical_expr::BuiltinScalarFunction; -use datafusion::physical_expr::expressions::{DateTimeIntervalExpr, GetIndexedFieldExpr}; +use datafusion::physical_expr::expressions::GetIndexedFieldExpr; use datafusion::physical_expr::{PhysicalSortExpr, ScalarFunctionExpr}; use datafusion::physical_plan::joins::utils::JoinSide; use datafusion::physical_plan::udaf::AggregateFunctionExpr; @@ -363,20 +363,6 @@ impl TryFrom> for protobuf::PhysicalExprNode { )), }) } - } else if let Some(expr) = expr.downcast_ref::() { - let dti_expr = Box::new(protobuf::PhysicalDateTimeIntervalExprNode { - l: Some(Box::new(expr.lhs().to_owned().try_into()?)), - r: Some(Box::new(expr.rhs().to_owned().try_into()?)), - op: format!("{:?}", expr.op()), - }); - - Ok(protobuf::PhysicalExprNode { - expr_type: Some( - protobuf::physical_expr_node::ExprType::DateTimeIntervalExpr( - dti_expr, - ), - ), - }) } else if let Some(expr) = expr.downcast_ref::() { Ok(protobuf::PhysicalExprNode { expr_type: Some(protobuf::physical_expr_node::ExprType::LikeExpr(