Skip to content

Commit

Permalink
[Minor] Extract interval parsing logic, add unit tests (#2984)
Browse files Browse the repository at this point in the history
* Extract interval parsing logic, add unit tests

* Update datafusion/sql/src/interval.rs
  • Loading branch information
alamb authored Jul 31, 2022
1 parent 0fa6a93 commit c7fa789
Show file tree
Hide file tree
Showing 4 changed files with 245 additions and 140 deletions.
25 changes: 24 additions & 1 deletion datafusion/common/src/scalar.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@ use arrow::{
compute::kernels::cast::{cast, cast_with_options, CastOptions},
datatypes::{
ArrowDictionaryKeyType, ArrowNativeType, DataType, Field, Float32Type,
Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, IntervalUnit, TimeUnit,
Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, IntervalDayTimeType,
IntervalMonthDayNanoType, IntervalUnit, IntervalYearMonthType, TimeUnit,
TimestampMicrosecondType, TimestampMillisecondType, TimestampNanosecondType,
TimestampSecondType, UInt16Type, UInt32Type, UInt64Type, UInt8Type,
DECIMAL_MAX_PRECISION,
Expand Down Expand Up @@ -618,6 +619,28 @@ impl ScalarValue {
precision, scale
)))
}

/// Returns a [`ScalarValue::IntervalYearMonth`] representing
/// `years` years and `months` months
pub fn new_interval_ym(years: i32, months: i32) -> Self {
let val = IntervalYearMonthType::make_value(years, months);
ScalarValue::IntervalYearMonth(Some(val))
}

/// Returns a [`ScalarValue::IntervalDayTime`] representing
/// `days` days and `millis` milliseconds
pub fn new_interval_dt(days: i32, millis: i32) -> Self {
let val = IntervalDayTimeType::make_value(days, millis);
Self::IntervalDayTime(Some(val))
}

/// Returns a [`ScalarValue::IntervalMonthDayNano`] representing
/// `months` months and `days` days, and `nanos` nanoseconds
pub fn new_interval_mdn(months: i32, days: i32, nanos: i64) -> Self {
let val = IntervalMonthDayNanoType::make_value(months, days, nanos);
ScalarValue::IntervalMonthDayNano(Some(val))
}

/// Getter for the `DataType` of the value
pub fn get_datatype(&self) -> DataType {
match self {
Expand Down
214 changes: 214 additions & 0 deletions datafusion/sql/src/interval.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,214 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

//! Interval parsing logic
use datafusion_common::{DataFusionError, Result, ScalarValue};
use std::str::FromStr;

const SECONDS_PER_HOUR: f32 = 3_600_f32;
const MILLIS_PER_SECOND: f32 = 1_000_f32;

/// Parses a string with an interval like `'0.5 MONTH'` to an
/// appropriately typed [`ScalarValue`]
pub(crate) fn parse_interval(leading_field: &str, value: &str) -> Result<ScalarValue> {
// We are storing parts as integers, it's why we need to align parts fractional
// INTERVAL '0.5 MONTH' = 15 days, INTERVAL '1.5 MONTH' = 1 month 15 days
// INTERVAL '0.5 DAY' = 12 hours, INTERVAL '1.5 DAY' = 1 day 12 hours
let align_interval_parts =
|month_part: f32, mut day_part: f32, mut milles_part: f32| -> (i32, i32, f32) {
// Convert fractional month to days, It's not supported by Arrow types, but anyway
day_part += (month_part - (month_part as i32) as f32) * 30_f32;

// Convert fractional days to hours
milles_part += (day_part - ((day_part as i32) as f32))
* 24_f32
* SECONDS_PER_HOUR
* MILLIS_PER_SECOND;

(month_part as i32, day_part as i32, milles_part)
};

let calculate_from_part =
|interval_period_str: &str, interval_type: &str| -> Result<(i32, i32, f32)> {
// @todo It's better to use Decimal in order to protect rounding errors
// Wait https://github.com/apache/arrow/pull/9232
let interval_period = match f32::from_str(interval_period_str) {
Ok(n) => n,
Err(_) => {
return Err(DataFusionError::NotImplemented(format!(
"Unsupported Interval Expression with value {:?}",
value
)));
}
};

if interval_period > (i32::MAX as f32) {
return Err(DataFusionError::NotImplemented(format!(
"Interval field value out of range: {:?}",
value
)));
}

match interval_type.to_lowercase().as_str() {
"year" => Ok(align_interval_parts(interval_period * 12_f32, 0.0, 0.0)),
"month" => Ok(align_interval_parts(interval_period, 0.0, 0.0)),
"day" | "days" => Ok(align_interval_parts(0.0, interval_period, 0.0)),
"hour" | "hours" => {
Ok((0, 0, interval_period * SECONDS_PER_HOUR * MILLIS_PER_SECOND))
}
"minutes" | "minute" => {
Ok((0, 0, interval_period * 60_f32 * MILLIS_PER_SECOND))
}
"seconds" | "second" => Ok((0, 0, interval_period * MILLIS_PER_SECOND)),
"milliseconds" | "millisecond" => Ok((0, 0, interval_period)),
_ => Err(DataFusionError::NotImplemented(format!(
"Invalid input syntax for type interval: {:?}",
value
))),
}
};

let mut result_month: i64 = 0;
let mut result_days: i64 = 0;
let mut result_millis: i64 = 0;

let mut parts = value.split_whitespace();

loop {
let interval_period_str = parts.next();
if interval_period_str.is_none() {
break;
}

let unit = parts.next().unwrap_or(leading_field);

let (diff_month, diff_days, diff_millis) =
calculate_from_part(interval_period_str.unwrap(), unit)?;

result_month += diff_month as i64;

if result_month > (i32::MAX as i64) {
return Err(DataFusionError::NotImplemented(format!(
"Interval field value out of range: {:?}",
value
)));
}

result_days += diff_days as i64;

if result_days > (i32::MAX as i64) {
return Err(DataFusionError::NotImplemented(format!(
"Interval field value out of range: {:?}",
value
)));
}

result_millis += diff_millis as i64;

if result_millis > (i32::MAX as i64) {
return Err(DataFusionError::NotImplemented(format!(
"Interval field value out of range: {:?}",
value
)));
}
}

// Interval is tricky thing
// 1 day is not 24 hours because timezones, 1 year != 365/364! 30 days != 1 month
// The true way to store and calculate intervals is to store it as it defined
// It's why we there are 3 different interval types in Arrow
if result_month != 0 && (result_days != 0 || result_millis != 0) {
let result: i128 = ((result_month as i128) << 96)
| ((result_days as i128) << 64)
// IntervalMonthDayNano uses nanos, but IntervalDayTime uses milles
| ((result_millis * 1_000_000_i64) as i128);

return Ok(ScalarValue::IntervalMonthDayNano(Some(result)));
}

// Month interval
if result_month != 0 {
return Ok(ScalarValue::IntervalYearMonth(Some(result_month as i32)));
}

let result: i64 = (result_days << 32) | result_millis;
Ok(ScalarValue::IntervalDayTime(Some(result)))
}

#[cfg(test)]
mod test {
use crate::assert_contains;

use super::*;

#[test]
fn test_parse_ym() {
assert_eq!(
parse_interval("months", "1 month").unwrap(),
ScalarValue::new_interval_ym(0, 1)
);

assert_eq!(
parse_interval("months", "2 month").unwrap(),
ScalarValue::new_interval_ym(0, 2)
);

assert_eq!(
parse_interval("months", "3 year 1 month").unwrap(),
ScalarValue::new_interval_ym(3, 1)
);

assert_contains!(
parse_interval("months", "1 years 1 month")
.unwrap_err()
.to_string(),
r#"Invalid input syntax for type interval: "1 years 1 month""#
);
}

#[test]
fn test_dt() {
assert_eq!(
parse_interval("months", "5 days").unwrap(),
ScalarValue::new_interval_dt(5, 0)
);

assert_eq!(
parse_interval("months", "7 days 3 hours").unwrap(),
ScalarValue::new_interval_dt(
7,
(3.0 * SECONDS_PER_HOUR * MILLIS_PER_SECOND) as i32
)
);

assert_eq!(
parse_interval("months", "7 days 5 minutes").unwrap(),
ScalarValue::new_interval_dt(7, (5.0 * 60.0 * MILLIS_PER_SECOND) as i32)
);
}

#[test]
// https://github.com/apache/arrow-rs/pull/2235
#[ignore]
fn test_mdn() {
// these should be the same, I think
assert_eq!(
parse_interval("months", "1 year 25 millisecond").unwrap(),
ScalarValue::new_interval_mdn(12, 0, 25 * 1_000_000)
);
}
}
1 change: 1 addition & 0 deletions datafusion/sql/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
//! This module provides a SQL parser that translates SQL queries into an abstract syntax
//! tree (AST), and a SQL query planner that creates a logical plan from the AST.
mod interval;
pub mod parser;
pub mod planner;
mod table_reference;
Expand Down
Loading

0 comments on commit c7fa789

Please sign in to comment.