From c609405135014ff336791ce70376c7cea393fb9f Mon Sep 17 00:00:00 2001 From: Terts Diepraam Date: Fri, 26 Jan 2024 17:17:36 +0100 Subject: [PATCH 01/15] Delete almost everything to start again --- src/lib.rs | 362 +----------------------------------------- src/parse_datetime.rs | 279 -------------------------------- tests/simple.rs | 148 ----------------- 3 files changed, 4 insertions(+), 785 deletions(-) delete mode 100644 src/parse_datetime.rs diff --git a/src/lib.rs b/src/lib.rs index 614b355..ea6e6d5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,26 +1,19 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. -// Expose parse_datetime -pub mod parse_datetime; - -use chrono::{Duration, Local, NaiveDate, Utc}; -use regex::{Error as RegexError, Regex}; use std::error::Error; use std::fmt::{self, Display}; +use chrono::{DateTime, FixedOffset}; + #[derive(Debug, PartialEq)] pub enum ParseDurationError { - InvalidRegex(RegexError), InvalidInput, } impl Display for ParseDurationError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { - ParseDurationError::InvalidRegex(err) => { - write!(f, "Invalid regex for time pattern: {err}") - } ParseDurationError::InvalidInput => { write!( f, @@ -33,353 +26,6 @@ impl Display for ParseDurationError { impl Error for ParseDurationError {} -impl From for ParseDurationError { - fn from(err: RegexError) -> Self { - ParseDurationError::InvalidRegex(err) - } -} - -/// Parses a relative time string and returns a `Duration` representing the -/// relative time. -/// -/// # Arguments -/// -/// * `s` - A string slice representing the relative time. -/// -/// # Examples -/// -/// ``` -/// use chrono::Duration; -/// let duration = parse_datetime::from_str("+3 days"); -/// assert_eq!(duration.unwrap(), Duration::days(3)); -/// ``` -/// -/// # Supported formats -/// -/// The function supports the following formats for relative time: -/// -/// * `num` `unit` (e.g., "-1 hour", "+3 days") -/// * `unit` (e.g., "hour", "day") -/// * "now" or "today" -/// * "yesterday" -/// * "tomorrow" -/// * use "ago" for the past -/// -/// `[num]` can be a positive or negative integer. -/// [unit] can be one of the following: "fortnight", "week", "day", "hour", -/// "minute", "min", "second", "sec" and their plural forms. -/// -/// It is also possible to pass "1 hour 2 minutes" or "2 days and 2 hours" -/// -/// # Returns -/// -/// * `Ok(Duration)` - If the input string can be parsed as a relative time -/// * `Err(ParseDurationError)` - If the input string cannot be parsed as a relative time -/// -/// # Errors -/// -/// This function will return `Err(ParseDurationError::InvalidInput)` if the input string -/// cannot be parsed as a relative time. -/// -/// # Examples -/// -/// ``` -/// use chrono::Duration; -/// use parse_datetime::{from_str, ParseDurationError}; -/// -/// assert_eq!(from_str("1 hour, 30 minutes").unwrap(), Duration::minutes(90)); -/// assert_eq!(from_str("tomorrow").unwrap(), Duration::days(1)); -/// assert!(matches!(from_str("invalid"), Err(ParseDurationError::InvalidInput))); -/// ``` -pub fn from_str(s: &str) -> Result { - from_str_at_date(Utc::now().date_naive(), s) -} - -/// Parses a duration string and returns a `Duration` instance, with the duration -/// calculated from the specified date. -/// -/// # Arguments -/// -/// * `date` - A `Date` instance representing the base date for the calculation -/// * `s` - A string slice representing the relative time. -/// -/// # Errors -/// -/// This function will return `Err(ParseDurationError::InvalidInput)` if the input string -/// cannot be parsed as a relative time. -/// -/// # Examples -/// -/// ``` -/// use chrono::{Duration, NaiveDate, Utc, Local}; -/// use parse_datetime::{from_str_at_date, ParseDurationError}; -/// let today = Local::now().date().naive_local(); -/// let yesterday = today - Duration::days(1); -/// assert_eq!( -/// from_str_at_date(yesterday, "2 days").unwrap(), -/// Duration::days(1) // 1 day from the specified date + 1 day from the input string -/// ); -/// ``` -pub fn from_str_at_date(date: NaiveDate, s: &str) -> Result { - let time_pattern: Regex = Regex::new( - r"(?x) - (?:(?P[-+]?\d*)\s*)? - (\s*(?Pnext|last)?\s*)? - (?Pyears?|months?|fortnights?|weeks?|days?|hours?|h|minutes?|mins?|m|seconds?|secs?|s|yesterday|tomorrow|now|today) - (\s*(?Pand|,)?\s*)? - (\s*(?Pago)?)?", - )?; - - let mut total_duration = Duration::seconds(0); - let mut is_ago = s.contains(" ago"); - let mut captures_processed = 0; - let mut total_length = 0; - - for capture in time_pattern.captures_iter(s) { - captures_processed += 1; - - let value_str = capture - .name("value") - .ok_or(ParseDurationError::InvalidInput)? - .as_str(); - let value = if value_str.is_empty() { - 1 - } else { - value_str - .parse::() - .map_err(|_| ParseDurationError::InvalidInput)? - }; - - if let Some(direction) = capture.name("direction") { - if direction.as_str() == "last" { - is_ago = true; - } - } - - let unit = capture - .name("unit") - .ok_or(ParseDurationError::InvalidInput)? - .as_str(); - - if capture.name("ago").is_some() { - is_ago = true; - } - - let duration = match unit { - "years" | "year" => Duration::days(value * 365), - "months" | "month" => Duration::days(value * 30), - "fortnights" | "fortnight" => Duration::weeks(value * 2), - "weeks" | "week" => Duration::weeks(value), - "days" | "day" => Duration::days(value), - "hours" | "hour" | "h" => Duration::hours(value), - "minutes" | "minute" | "mins" | "min" | "m" => Duration::minutes(value), - "seconds" | "second" | "secs" | "sec" | "s" => Duration::seconds(value), - "yesterday" => Duration::days(-1), - "tomorrow" => Duration::days(1), - "now" | "today" => Duration::zero(), - _ => return Err(ParseDurationError::InvalidInput), - }; - let neg_duration = -duration; - total_duration = - match total_duration.checked_add(if is_ago { &neg_duration } else { &duration }) { - Some(duration) => duration, - None => return Err(ParseDurationError::InvalidInput), - }; - - // Calculate the total length of the matched substring - if let Some(m) = capture.get(0) { - total_length += m.end() - m.start(); - } - } - - // Check if the entire input string has been captured - if total_length != s.len() { - return Err(ParseDurationError::InvalidInput); - } - - if captures_processed == 0 { - Err(ParseDurationError::InvalidInput) - } else { - let time_now = Local::now().date_naive(); - let date_duration = date - time_now; - - Ok(total_duration + date_duration) - } -} - -#[cfg(test)] -mod tests { - - use super::ParseDurationError; - use super::{from_str, from_str_at_date}; - use chrono::{Duration, Local, NaiveDate}; - - #[test] - fn test_years() { - assert_eq!(from_str("1 year").unwrap(), Duration::seconds(31_536_000)); - assert_eq!( - from_str("-2 years").unwrap(), - Duration::seconds(-63_072_000) - ); - assert_eq!( - from_str("2 years ago").unwrap(), - Duration::seconds(-63_072_000) - ); - assert_eq!(from_str("year").unwrap(), Duration::seconds(31_536_000)); - } - - #[test] - fn test_months() { - assert_eq!(from_str("1 month").unwrap(), Duration::seconds(2_592_000)); - assert_eq!( - from_str("1 month and 2 weeks").unwrap(), - Duration::seconds(3_801_600) - ); - assert_eq!( - from_str("1 month and 2 weeks ago").unwrap(), - Duration::seconds(-3_801_600) - ); - assert_eq!(from_str("2 months").unwrap(), Duration::seconds(5_184_000)); - assert_eq!(from_str("month").unwrap(), Duration::seconds(2_592_000)); - } - - #[test] - fn test_fortnights() { - assert_eq!( - from_str("1 fortnight").unwrap(), - Duration::seconds(1_209_600) - ); - assert_eq!( - from_str("3 fortnights").unwrap(), - Duration::seconds(3_628_800) - ); - assert_eq!(from_str("fortnight").unwrap(), Duration::seconds(1_209_600)); - } - - #[test] - fn test_weeks() { - assert_eq!(from_str("1 week").unwrap(), Duration::seconds(604_800)); - assert_eq!( - from_str("1 week 3 days").unwrap(), - Duration::seconds(864_000) - ); - assert_eq!( - from_str("1 week 3 days ago").unwrap(), - Duration::seconds(-864_000) - ); - assert_eq!(from_str("-2 weeks").unwrap(), Duration::seconds(-1_209_600)); - assert_eq!( - from_str("2 weeks ago").unwrap(), - Duration::seconds(-1_209_600) - ); - assert_eq!(from_str("week").unwrap(), Duration::seconds(604_800)); - } - - #[test] - fn test_days() { - assert_eq!(from_str("1 day").unwrap(), Duration::seconds(86400)); - assert_eq!(from_str("2 days ago").unwrap(), Duration::seconds(-172_800)); - assert_eq!(from_str("-2 days").unwrap(), Duration::seconds(-172_800)); - assert_eq!(from_str("day").unwrap(), Duration::seconds(86400)); - } - - #[test] - fn test_hours() { - assert_eq!(from_str("1 hour").unwrap(), Duration::seconds(3600)); - assert_eq!(from_str("1 hour ago").unwrap(), Duration::seconds(-3600)); - assert_eq!(from_str("-2 hours").unwrap(), Duration::seconds(-7200)); - assert_eq!(from_str("hour").unwrap(), Duration::seconds(3600)); - } - - #[test] - fn test_minutes() { - assert_eq!(from_str("1 minute").unwrap(), Duration::seconds(60)); - assert_eq!(from_str("2 minutes").unwrap(), Duration::seconds(120)); - assert_eq!(from_str("min").unwrap(), Duration::seconds(60)); - } - - #[test] - fn test_seconds() { - assert_eq!(from_str("1 second").unwrap(), Duration::seconds(1)); - assert_eq!(from_str("2 seconds").unwrap(), Duration::seconds(2)); - assert_eq!(from_str("sec").unwrap(), Duration::seconds(1)); - } - - #[test] - fn test_relative_days() { - assert_eq!(from_str("now").unwrap(), Duration::seconds(0)); - assert_eq!(from_str("today").unwrap(), Duration::seconds(0)); - assert_eq!(from_str("yesterday").unwrap(), Duration::seconds(-86400)); - assert_eq!(from_str("tomorrow").unwrap(), Duration::seconds(86400)); - } - - #[test] - fn test_no_spaces() { - assert_eq!(from_str("-1hour").unwrap(), Duration::hours(-1)); - assert_eq!(from_str("+3days").unwrap(), Duration::days(3)); - assert_eq!(from_str("2weeks").unwrap(), Duration::weeks(2)); - assert_eq!( - from_str("2weeks 1hour").unwrap(), - Duration::seconds(1_213_200) - ); - assert_eq!( - from_str("2weeks 1hour ago").unwrap(), - Duration::seconds(-1_213_200) - ); - assert_eq!(from_str("+4months").unwrap(), Duration::days(4 * 30)); - assert_eq!(from_str("-2years").unwrap(), Duration::days(-2 * 365)); - assert_eq!(from_str("15minutes").unwrap(), Duration::minutes(15)); - assert_eq!(from_str("-30seconds").unwrap(), Duration::seconds(-30)); - assert_eq!(from_str("30seconds ago").unwrap(), Duration::seconds(-30)); - } - - #[test] - fn test_invalid_input() { - let result = from_str("foobar"); - println!("{result:?}"); - assert_eq!(result, Err(ParseDurationError::InvalidInput)); - - let result = from_str("invalid 1"); - assert_eq!(result, Err(ParseDurationError::InvalidInput)); - // Fails for now with a panic - /* let result = from_str("777777777777777771m"); - match result { - Err(ParseDurationError::InvalidInput) => assert!(true), - _ => assert!(false), - }*/ - } - - #[test] - fn test_from_str_at_date() { - let date = NaiveDate::from_ymd_opt(2014, 9, 5).unwrap(); - let now = Local::now().date_naive(); - let days_diff = (date - now).num_days(); - - assert_eq!( - from_str_at_date(date, "1 day").unwrap(), - Duration::days(days_diff + 1) - ); - - assert_eq!( - from_str_at_date(date, "2 hours").unwrap(), - Duration::days(days_diff) + Duration::hours(2) - ); - } - - #[test] - fn test_invalid_input_at_date() { - let date = NaiveDate::from_ymd_opt(2014, 9, 5).unwrap(); - assert!(matches!( - from_str_at_date(date, "invalid"), - Err(ParseDurationError::InvalidInput) - )); - } - - #[test] - fn test_direction() { - assert_eq!(from_str("last hour").unwrap(), Duration::seconds(-3600)); - assert_eq!(from_str("next year").unwrap(), Duration::days(365)); - assert_eq!(from_str("next week").unwrap(), Duration::days(7)); - assert_eq!(from_str("last month").unwrap(), Duration::days(-30)); - } +fn parse_datetime(s: &str) -> Result, ParseDurationError> { + todo!() } diff --git a/src/parse_datetime.rs b/src/parse_datetime.rs deleted file mode 100644 index 7b1f536..0000000 --- a/src/parse_datetime.rs +++ /dev/null @@ -1,279 +0,0 @@ -// For the full copyright and license information, please view the LICENSE -// file that was distributed with this source code. - -use chrono::{DateTime, FixedOffset, Local, LocalResult, NaiveDateTime, TimeZone}; - -use crate::ParseDurationError; - -/// Formats that parse input can take. -/// Taken from `touch` coreutils -mod format { - pub(crate) const ISO_8601: &str = "%Y-%m-%d"; - pub(crate) const ISO_8601_NO_SEP: &str = "%Y%m%d"; - pub(crate) const POSIX_LOCALE: &str = "%a %b %e %H:%M:%S %Y"; - pub(crate) const YYYYMMDDHHMM_DOT_SS: &str = "%Y%m%d%H%M.%S"; - pub(crate) const YYYYMMDDHHMMSS: &str = "%Y-%m-%d %H:%M:%S.%f"; - pub(crate) const YYYYMMDDHHMMS: &str = "%Y-%m-%d %H:%M:%S"; - pub(crate) const YYYY_MM_DD_HH_MM: &str = "%Y-%m-%d %H:%M"; - pub(crate) const YYYYMMDDHHMM: &str = "%Y%m%d%H%M"; - pub(crate) const YYYYMMDDHHMM_OFFSET: &str = "%Y%m%d%H%M %z"; - pub(crate) const YYYYMMDDHHMM_UTC_OFFSET: &str = "%Y%m%d%H%MUTC%z"; - pub(crate) const YYYYMMDDHHMM_ZULU_OFFSET: &str = "%Y%m%d%H%MZ%z"; - pub(crate) const YYYYMMDDHHMM_HYPHENATED_OFFSET: &str = "%Y-%m-%d %H:%M %z"; - pub(crate) const YYYYMMDDHHMMS_T_SEP: &str = "%Y-%m-%dT%H:%M:%S"; - pub(crate) const UTC_OFFSET: &str = "UTC%#z"; - pub(crate) const ZULU_OFFSET: &str = "Z%#z"; -} - -/// Loosely parses a time string and returns a `DateTime` representing the -/// absolute time of the string. -/// -/// # Arguments -/// -/// * `s` - A string slice representing the time. -/// -/// # Examples -/// -/// ``` -/// use chrono::{DateTime, Utc, TimeZone}; -/// let time = parse_datetime::parse_datetime::from_str("2023-06-03 12:00:01Z"); -/// assert_eq!(time.unwrap(), Utc.with_ymd_and_hms(2023, 06, 03, 12, 00, 01).unwrap()); -/// ``` -/// -/// # Supported formats -/// -/// The function supports the following formats for time: -/// -/// * ISO formats -/// * timezone offsets, e.g., "UTC-0100" -/// -/// # Returns -/// -/// * `Ok(DateTime)` - If the input string can be parsed as a time -/// * `Err(ParseDurationError)` - If the input string cannot be parsed as a relative time -/// -/// # Errors -/// -/// This function will return `Err(ParseDurationError::InvalidInput)` if the input string -/// cannot be parsed as a relative time. -/// -pub fn from_str + Clone>(s: S) -> Result, ParseDurationError> { - // TODO: Replace with a proper customiseable parsing solution using `nom`, `grmtools`, or - // similar - - // Formats with offsets don't require NaiveDateTime workaround - for fmt in [ - format::YYYYMMDDHHMM_OFFSET, - format::YYYYMMDDHHMM_HYPHENATED_OFFSET, - format::YYYYMMDDHHMM_UTC_OFFSET, - format::YYYYMMDDHHMM_ZULU_OFFSET, - ] { - if let Ok(parsed) = DateTime::parse_from_str(s.as_ref(), fmt) { - return Ok(parsed); - } - } - - // Parse formats with no offset, assume local time - for fmt in [ - format::YYYYMMDDHHMMS_T_SEP, - format::YYYYMMDDHHMM, - format::YYYYMMDDHHMMS, - format::YYYYMMDDHHMMSS, - format::YYYY_MM_DD_HH_MM, - format::YYYYMMDDHHMM_DOT_SS, - format::POSIX_LOCALE, - ] { - if let Ok(parsed) = NaiveDateTime::parse_from_str(s.as_ref(), fmt) { - if let Ok(dt) = naive_dt_to_fixed_offset(parsed) { - return Ok(dt); - } - } - } - - // Parse epoch seconds - if s.as_ref().bytes().next() == Some(b'@') { - if let Ok(parsed) = NaiveDateTime::parse_from_str(&s.as_ref()[1..], "%s") { - if let Ok(dt) = naive_dt_to_fixed_offset(parsed) { - return Ok(dt); - } - } - } - - let ts = s.as_ref().to_owned() + "0000"; - // Parse date only formats - assume midnight local timezone - for fmt in [format::ISO_8601, format::ISO_8601_NO_SEP] { - let f = fmt.to_owned() + "%H%M"; - if let Ok(parsed) = NaiveDateTime::parse_from_str(&ts, &f) { - if let Ok(dt) = naive_dt_to_fixed_offset(parsed) { - return Ok(dt); - } - } - } - - // Parse offsets. chrono doesn't provide any functionality to parse - // offsets, so instead we replicate parse_date behaviour by getting - // the current date with local, and create a date time string at midnight, - // before trying offset suffixes - let local = Local::now(); - let ts = format!("{}", local.format("%Y%m%d")) + "0000" + s.as_ref(); - for fmt in [format::UTC_OFFSET, format::ZULU_OFFSET] { - let f = format::YYYYMMDDHHMM.to_owned() + fmt; - if let Ok(parsed) = DateTime::parse_from_str(&ts, &f) { - return Ok(parsed); - } - } - - // Default parse and failure - s.as_ref() - .parse() - .map_err(|_| (ParseDurationError::InvalidInput)) -} - -// Convert NaiveDateTime to DateTime by assuming the offset -// is local time -fn naive_dt_to_fixed_offset(dt: NaiveDateTime) -> Result, ()> { - let now = Local::now(); - match now.offset().from_local_datetime(&dt) { - LocalResult::Single(dt) => Ok(dt), - _ => Err(()), - } -} - -#[cfg(test)] -mod tests { - static TEST_TIME: i64 = 1613371067; - - #[cfg(test)] - mod iso_8601 { - use std::env; - - use crate::{ - parse_datetime::from_str, parse_datetime::tests::TEST_TIME, ParseDurationError, - }; - - #[test] - fn test_t_sep() { - env::set_var("TZ", "UTC"); - let dt = "2021-02-15T06:37:47"; - let actual = from_str(dt); - assert_eq!(actual.unwrap().timestamp(), TEST_TIME); - } - - #[test] - fn test_space_sep() { - env::set_var("TZ", "UTC"); - let dt = "2021-02-15 06:37:47"; - let actual = from_str(dt); - assert_eq!(actual.unwrap().timestamp(), TEST_TIME); - } - - #[test] - fn test_space_sep_offset() { - env::set_var("TZ", "UTC"); - let dt = "2021-02-14 22:37:47 -0800"; - let actual = from_str(dt); - assert_eq!(actual.unwrap().timestamp(), TEST_TIME); - } - - #[test] - fn test_t_sep_offset() { - env::set_var("TZ", "UTC"); - let dt = "2021-02-14T22:37:47 -0800"; - let actual = from_str(dt); - assert_eq!(actual.unwrap().timestamp(), TEST_TIME); - } - - #[test] - fn invalid_formats() { - let invalid_dts = vec!["NotADate", "202104", "202104-12T22:37:47"]; - for dt in invalid_dts { - assert_eq!(from_str(dt), Err(ParseDurationError::InvalidInput)); - } - } - - #[test] - fn test_epoch_seconds() { - env::set_var("TZ", "UTC"); - let dt = "@1613371067"; - let actual = from_str(dt); - assert_eq!(actual.unwrap().timestamp(), TEST_TIME); - } - } - - #[cfg(test)] - mod offsets { - use chrono::Local; - - use crate::{parse_datetime::from_str, ParseDurationError}; - - #[test] - fn test_positive_offsets() { - let offsets = vec![ - "UTC+07:00", - "UTC+0700", - "UTC+07", - "Z+07:00", - "Z+0700", - "Z+07", - ]; - - let expected = format!("{}{}", Local::now().format("%Y%m%d"), "0000+0700"); - for offset in offsets { - let actual = from_str(offset).unwrap(); - assert_eq!(expected, format!("{}", actual.format("%Y%m%d%H%M%z"))); - } - } - - #[test] - fn test_partial_offset() { - let offsets = vec!["UTC+00:15", "UTC+0015", "Z+00:15", "Z+0015"]; - let expected = format!("{}{}", Local::now().format("%Y%m%d"), "0000+0015"); - for offset in offsets { - let actual = from_str(offset).unwrap(); - assert_eq!(expected, format!("{}", actual.format("%Y%m%d%H%M%z"))); - } - } - - #[test] - fn invalid_offset_format() { - let invalid_offsets = vec!["+0700", "UTC+2", "Z-1", "UTC+01005"]; - for offset in invalid_offsets { - assert_eq!(from_str(offset), Err(ParseDurationError::InvalidInput)); - } - } - } - - #[cfg(test)] - mod timestamp { - use crate::parse_datetime::from_str; - use chrono::{TimeZone, Utc}; - - #[test] - fn test_positive_offsets() { - let offsets: Vec = vec![ - 0, 1, 2, 10, 100, 150, 2000, 1234400000, 1334400000, 1692582913, 2092582910, - ]; - - for offset in offsets { - let time = Utc.timestamp(offset, 0); - let dt = from_str(format!("@{}", offset)); - assert_eq!(dt.unwrap(), time); - } - } - } - - /// Used to test example code presented in the README. - mod readme_test { - use crate::parse_datetime::from_str; - use chrono::{Local, TimeZone}; - - #[test] - fn test_readme_code() { - let dt = from_str("2021-02-14 06:37:47"); - assert_eq!( - dt.unwrap(), - Local.with_ymd_and_hms(2021, 2, 14, 6, 37, 47).unwrap() - ); - } - } -} diff --git a/tests/simple.rs b/tests/simple.rs index a538f9d..e69de29 100644 --- a/tests/simple.rs +++ b/tests/simple.rs @@ -1,148 +0,0 @@ -use chrono::{Duration, Utc}; -use parse_datetime::{from_str, from_str_at_date, ParseDurationError}; - -#[test] -fn test_invalid_input() { - let result = from_str("foobar"); - println!("{result:?}"); - assert_eq!(result, Err(ParseDurationError::InvalidInput)); - - let result = from_str("invalid 1"); - assert_eq!(result, Err(ParseDurationError::InvalidInput)); -} - -#[test] -fn test_duration_parsing() { - assert_eq!(from_str("1 year").unwrap(), Duration::seconds(31_536_000)); - assert_eq!( - from_str("-2 years").unwrap(), - Duration::seconds(-63_072_000) - ); - assert_eq!( - from_str("2 years ago").unwrap(), - Duration::seconds(-63_072_000) - ); - assert_eq!(from_str("year").unwrap(), Duration::seconds(31_536_000)); - - assert_eq!(from_str("1 month").unwrap(), Duration::seconds(2_592_000)); - assert_eq!( - from_str("1 month and 2 weeks").unwrap(), - Duration::seconds(3_801_600) - ); - assert_eq!( - from_str("1 month, 2 weeks").unwrap(), - Duration::seconds(3_801_600) - ); - assert_eq!( - from_str("1 months 2 weeks").unwrap(), - Duration::seconds(3_801_600) - ); - assert_eq!( - from_str("1 month and 2 weeks ago").unwrap(), - Duration::seconds(-3_801_600) - ); - assert_eq!(from_str("2 months").unwrap(), Duration::seconds(5_184_000)); - assert_eq!(from_str("month").unwrap(), Duration::seconds(2_592_000)); - - assert_eq!( - from_str("1 fortnight").unwrap(), - Duration::seconds(1_209_600) - ); - assert_eq!( - from_str("3 fortnights").unwrap(), - Duration::seconds(3_628_800) - ); - assert_eq!(from_str("fortnight").unwrap(), Duration::seconds(1_209_600)); - - assert_eq!(from_str("1 week").unwrap(), Duration::seconds(604_800)); - assert_eq!( - from_str("1 week 3 days").unwrap(), - Duration::seconds(864_000) - ); - assert_eq!( - from_str("1 week 3 days ago").unwrap(), - Duration::seconds(-864_000) - ); - assert_eq!(from_str("-2 weeks").unwrap(), Duration::seconds(-1_209_600)); - assert_eq!( - from_str("2 weeks ago").unwrap(), - Duration::seconds(-1_209_600) - ); - assert_eq!(from_str("week").unwrap(), Duration::seconds(604_800)); - - assert_eq!(from_str("1 day").unwrap(), Duration::seconds(86_400)); - assert_eq!(from_str("2 days ago").unwrap(), Duration::seconds(-172_800)); - assert_eq!(from_str("-2 days").unwrap(), Duration::seconds(-172_800)); - assert_eq!(from_str("day").unwrap(), Duration::seconds(86_400)); - - assert_eq!(from_str("1 hour").unwrap(), Duration::seconds(3_600)); - assert_eq!(from_str("1 h").unwrap(), Duration::seconds(3_600)); - assert_eq!(from_str("1 hour ago").unwrap(), Duration::seconds(-3_600)); - assert_eq!(from_str("-2 hours").unwrap(), Duration::seconds(-7_200)); - assert_eq!(from_str("hour").unwrap(), Duration::seconds(3_600)); - - assert_eq!(from_str("1 minute").unwrap(), Duration::seconds(60)); - assert_eq!(from_str("1 min").unwrap(), Duration::seconds(60)); - assert_eq!(from_str("2 minutes").unwrap(), Duration::seconds(120)); - assert_eq!(from_str("2 mins").unwrap(), Duration::seconds(120)); - assert_eq!(from_str("2m").unwrap(), Duration::seconds(120)); - assert_eq!(from_str("min").unwrap(), Duration::seconds(60)); - - assert_eq!(from_str("1 second").unwrap(), Duration::seconds(1)); - assert_eq!(from_str("1 s").unwrap(), Duration::seconds(1)); - assert_eq!(from_str("2 seconds").unwrap(), Duration::seconds(2)); - assert_eq!(from_str("2 secs").unwrap(), Duration::seconds(2)); - assert_eq!(from_str("2 sec").unwrap(), Duration::seconds(2)); - assert_eq!(from_str("sec").unwrap(), Duration::seconds(1)); - - assert_eq!(from_str("now").unwrap(), Duration::seconds(0)); - assert_eq!(from_str("today").unwrap(), Duration::seconds(0)); - - assert_eq!( - from_str("1 year 2 months 4 weeks 3 days and 2 seconds").unwrap(), - Duration::seconds(39_398_402) - ); - assert_eq!( - from_str("1 year 2 months 4 weeks 3 days and 2 seconds ago").unwrap(), - Duration::seconds(-39_398_402) - ); -} - -#[test] -#[should_panic] -fn test_display_parse_duration_error_through_from_str() { - let invalid_input = "9223372036854775807 seconds and 1 second"; - let _ = from_str(invalid_input).unwrap(); -} - -#[test] -fn test_display_should_fail() { - let invalid_input = "Thu Jan 01 12:34:00 2015"; - let error = from_str(invalid_input).unwrap_err(); - - assert_eq!( - format!("{error}"), - "Invalid input string: cannot be parsed as a relative time" - ); -} - -#[test] -fn test_from_str_at_date_day() { - let today = Utc::now().date_naive(); - let yesterday = today - Duration::days(1); - assert_eq!( - from_str_at_date(yesterday, "2 days").unwrap(), - Duration::days(1) - ); -} - -#[test] -fn test_invalid_input_at_date() { - let today = Utc::now().date_naive(); - let result = from_str_at_date(today, "foobar"); - println!("{result:?}"); - assert_eq!(result, Err(ParseDurationError::InvalidInput)); - - let result = from_str_at_date(today, "invalid 1r"); - assert_eq!(result, Err(ParseDurationError::InvalidInput)); -} From 299952166baeda7ed7fb27918ce48f98facd247c Mon Sep 17 00:00:00 2001 From: Terts Diepraam Date: Fri, 26 Jan 2024 21:51:01 +0100 Subject: [PATCH 02/15] start parsing date with winnow --- Cargo.lock | 49 ++-------- Cargo.toml | 2 +- src/items/date.rs | 228 ++++++++++++++++++++++++++++++++++++++++++++++ src/items/mod.rs | 57 ++++++++++++ src/lib.rs | 22 +++-- 5 files changed, 311 insertions(+), 47 deletions(-) create mode 100644 src/items/date.rs create mode 100644 src/items/mod.rs diff --git a/Cargo.lock b/Cargo.lock index 0d099a3..085c66f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,15 +2,6 @@ # It is not intended for manual editing. version = 3 -[[package]] -name = "aho-corasick" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67fc08ce920c31afb70f013dcce1bfc3a3195de6a228474e45e1f145b36f8d04" -dependencies = [ - "memchr", -] - [[package]] name = "android-tzdata" version = "0.1.1" @@ -138,7 +129,7 @@ name = "parse_datetime" version = "0.4.0" dependencies = [ "chrono", - "regex", + "winnow", ] [[package]] @@ -159,35 +150,6 @@ dependencies = [ "proc-macro2", ] -[[package]] -name = "regex" -version = "1.9.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2eae68fc220f7cf2532e4494aded17545fce192d59cd996e0fe7887f4ceb575" -dependencies = [ - "aho-corasick", - "memchr", - "regex-automata", - "regex-syntax", -] - -[[package]] -name = "regex-automata" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e9aaecc05d5c4b5f7da074b9a0d1a0867e71fd36e7fc0482d8bcfe8e8fc56290" -dependencies = [ - "aho-corasick", - "memchr", - "regex-syntax", -] - -[[package]] -name = "regex-syntax" -version = "0.7.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2ab07dc67230e4a4718e70fd5c20055a4334b121f1f9db8fe63ef39ce9b8c846" - [[package]] name = "syn" version = "2.0.18" @@ -346,3 +308,12 @@ name = "windows_x86_64_msvc" version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a" + +[[package]] +name = "winnow" +version = "0.5.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7cf47b659b318dccbd69cc4797a39ae128f533dce7902a1096044d1967b9c16" +dependencies = [ + "memchr", +] diff --git a/Cargo.toml b/Cargo.toml index 23f5f4f..c9c14e1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,5 +8,5 @@ repository = "https://github.com/uutils/parse_datetime" readme = "README.md" [dependencies] -regex = "1.9" chrono = { version="0.4", default-features=false, features=["std", "alloc", "clock"] } +winnow = "0.5.34" diff --git a/src/items/date.rs b/src/items/date.rs new file mode 100644 index 0000000..12fe924 --- /dev/null +++ b/src/items/date.rs @@ -0,0 +1,228 @@ +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +// spell-checker:ignore multispace + +//! Parse a date item (without time component) +//! +//! The GNU docs say: +//! +//! > A calendar date item specifies a day of the year. It is specified +//! > differently, depending on whether the month is specified numerically +//! > or literally. +//! > +//! > ... +//! > +//! > For numeric months, the ISO 8601 format ‘year-month-day’ is allowed, +//! > where year is any positive number, month is a number between 01 and +//! > 12, and day is a number between 01 and 31. A leading zero must be +//! > present if a number is less than ten. If year is 68 or smaller, then +//! > 2000 is added to it; otherwise, if year is less than 100, then 1900 +//! > is added to it. The construct ‘month/day/year’, popular in the United +//! > States, is accepted. Also ‘month/day’, omitting the year. +//! > +//! > Literal months may be spelled out in full: ‘January’, ‘February’, +//! > ‘March’, ‘April’, ‘May’, ‘June’, ‘July’, ‘August’, ‘September’, +//! > ‘October’, ‘November’ or ‘December’. Literal months may be +//! > abbreviated to their first three letters, possibly followed by an +//! > abbreviating dot. It is also permitted to write ‘Sept’ instead of +//! > ‘September’. + +use winnow::{ + ascii::{alpha1, dec_uint, multispace0}, + combinator::{alt, opt, preceded}, + seq, + token::take, + PResult, Parser, +}; + +use crate::ParseDateTimeError; + +#[derive(PartialEq, Eq, Debug)] +pub struct Date { + day: u32, + month: u32, + year: Option, +} + +pub fn parse(input: &mut &str) -> PResult { + alt((iso, us, literal1, literal2)).parse_next(input) +} + +/// Parse `YYYY-MM-DD` or `YY-MM-DD` +fn iso(input: &mut &str) -> PResult { + seq!(Date { + year: year.map(Some), + _: (multispace0, '-', multispace0), + month: month, + _: (multispace0, '-', multispace0), + day: day, + }) + .parse_next(input) +} + +/// Parse `MM/DD/YYYY`, `MM/DD/YY` or `MM/DD` +fn us(input: &mut &str) -> PResult { + seq!(Date { + month: month, + _: (multispace0, '/', multispace0), + day: day, + year: opt(preceded((multispace0, '/', multispace0), year)), + }) + .parse_next(input) +} + +/// Parse `14 November 2022`, `14 Nov 2022`, "14nov2022", "14-nov-2022", "14-nov2022", "14nov-2022" +fn literal1(input: &mut &str) -> PResult { + seq!(Date { + day: day, + _: (multispace0, opt('-'), multispace0), + month: literal_month, + year: opt(preceded((multispace0, opt('-'), multispace0), year)), + }) + .parse_next(input) +} + +/// Parse `November 14, 2022` and `Nov 14, 2022` +fn literal2(input: &mut &str) -> PResult { + seq!(Date { + month: literal_month, + _: multispace0, + day: day, + // FIXME: GNU requires _some_ space between the day and the year, + // probably to distinguish with floats. + year: opt(preceded((multispace0, ",", multispace0), year)), + }) + .parse_next(input) +} + +fn year(input: &mut &str) -> PResult { + alt(( + take(4usize).try_map(|x: &str| x.parse()), + take(3usize).try_map(|x: &str| x.parse()), + take(2usize).try_map(|x: &str| x.parse()).map( + |x: u32| { + if x <= 68 { + x + 2000 + } else { + x + 1900 + } + }, + ), + )) + .parse_next(input) +} + +fn month(input: &mut &str) -> PResult { + dec_uint + .try_map(|x| { + (x >= 1 && x <= 12) + .then_some(x) + .ok_or(ParseDateTimeError::InvalidInput) + }) + .parse_next(input) +} + +fn day(input: &mut &str) -> PResult { + dec_uint + .try_map(|x| { + (x >= 1 && x <= 31) + .then_some(x) + .ok_or(ParseDateTimeError::InvalidInput) + }) + .parse_next(input) +} + +/// Parse the name of a month (case-insensitive) +fn literal_month(input: &mut &str) -> PResult { + alpha1 + .try_map(|s: &str| { + let s = s.to_ascii_lowercase(); + let month = match s.as_ref() { + "january" | "jan" => 1, + "february" | "feb" => 2, + "march" | "mar" => 3, + "april" | "apr" => 4, + "may" => 5, + "june" | "jun" => 6, + "july" | "jul" => 7, + "august" | "aug" => 8, + "september" | "sep" | "sept" => 9, + "october" | "oct" => 10, + "november" | "nov" => 11, + "december" | "dec" => 12, + _ => return Err(ParseDateTimeError::InvalidInput), + }; + Ok(month) + }) + .parse_next(input) +} + +#[cfg(test)] +mod test { + use super::{parse, Date}; + + // Test cases from the GNU docs: + // + // ``` + // 2022-11-14 # ISO 8601. + // 22-11-14 # Assume 19xx for 69 through 99, + // # 20xx for 00 through 68 (not recommended). + // 11/14/2022 # Common U.S. writing. + // 14 November 2022 + // 14 Nov 2022 # Three-letter abbreviations always allowed. + // November 14, 2022 + // 14-nov-2022 + // 14nov2022 + // ``` + + #[test] + fn with_year() { + let reference = Date { + year: Some(2022), + month: 11, + day: 14, + }; + + for mut s in [ + "2022-11-14", + "2022 - 11 - 14", + "22-11-14", + "11/14/2022", + "11 / 14 / 2022", + "11/14/22", + "14 November 2022", + "14 Nov 2022", + "November 14, 2022", + "November 14 , 2022", + "Nov 14, 2022", + "14-nov-2022", + "14nov2022", + "14nov 2022", + "NoVeMbEr 14, 2022", + ] { + let old_s = s.to_owned(); + assert_eq!(parse(&mut s).unwrap(), reference, "Format string: {old_s}"); + } + } + + #[test] + fn no_year() { + let reference = Date { + year: None, + month: 11, + day: 14, + }; + for mut s in [ + "11/14", + "14 November", + "14 Nov", + "November 14", + "Nov 14", + "14-nov", + "14nov", + ] { + assert_eq!(parse(&mut s).unwrap(), reference); + } + } +} diff --git a/src/items/mod.rs b/src/items/mod.rs new file mode 100644 index 0000000..c6d780d --- /dev/null +++ b/src/items/mod.rs @@ -0,0 +1,57 @@ +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +// spell-checker:ignore chrono + +//! From the GNU docs: +//! +//! > A date is a string, possibly empty, containing many items separated by +//! > whitespace. The whitespace may be omitted when no ambiguity arises. The +//! > empty string means the beginning of today (i.e., midnight). Order of the +//! > items is immaterial. A date string may contain many flavors of items: +//! > - calendar date items +//! > - time of day items +//! > - time zone items +//! > - combined date and time of day items +//! > - day of the week items +//! > - relative items +//! > - pure numbers. +//! +//! We put all of those in separate modules: +//! - [`date`] +//! - [`time`] +//! - [`time_zone`] +//! - [`combined`] +//! - [`weekday`] +//! - [`relative`] +//! - [`number] + +use chrono::{NaiveDateTime, NaiveTime, Weekday}; +use winnow::{combinator::alt, PResult, Parser}; +mod date; + +pub enum Item { + Date(date::Date), + TimeOfDay(NaiveTime), + _TimeZone, + Combined(NaiveDateTime), + Weekday(Weekday), + _Relative, + _PureNumber, +} + +pub fn parse(input: &mut &str) -> PResult { + alt((date::parse.map(Item::Date),)).parse_next(input) +} + +mod time {} + +mod time_zone {} + +mod combined {} + +mod weekday {} + +mod relative {} + +mod number {} diff --git a/src/lib.rs b/src/lib.rs index ea6e6d5..4ff0a91 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,20 +1,25 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. +// spell-checker:ignore datetime + use std::error::Error; use std::fmt::{self, Display}; -use chrono::{DateTime, FixedOffset}; +use items::Item; +use winnow::Parser; + +mod items; #[derive(Debug, PartialEq)] -pub enum ParseDurationError { +pub enum ParseDateTimeError { InvalidInput, } -impl Display for ParseDurationError { +impl Display for ParseDateTimeError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { - ParseDurationError::InvalidInput => { + ParseDateTimeError::InvalidInput => { write!( f, "Invalid input string: cannot be parsed as a relative time" @@ -24,8 +29,11 @@ impl Display for ParseDurationError { } } -impl Error for ParseDurationError {} +impl Error for ParseDateTimeError {} -fn parse_datetime(s: &str) -> Result, ParseDurationError> { - todo!() +pub fn parse_datetime(mut input: &str) -> Result { + match items::parse.parse_next(&mut input) { + Ok(x) => Ok(x), + Err(_) => Err(ParseDateTimeError::InvalidInput), + } } From 176516e410e84da32f1954d982567a53ce5d2e98 Mon Sep 17 00:00:00 2001 From: Terts Diepraam Date: Sat, 27 Jan 2024 00:33:04 +0100 Subject: [PATCH 03/15] start parsing time with winnow --- src/items/mod.rs | 29 +++- src/items/time.rs | 386 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 408 insertions(+), 7 deletions(-) create mode 100644 src/items/time.rs diff --git a/src/items/mod.rs b/src/items/mod.rs index c6d780d..f262f1d 100644 --- a/src/items/mod.rs +++ b/src/items/mod.rs @@ -1,7 +1,7 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. -// spell-checker:ignore chrono +// spell-checker:ignore chrono multispace0 //! From the GNU docs: //! @@ -26,13 +26,20 @@ //! - [`relative`] //! - [`number] -use chrono::{NaiveDateTime, NaiveTime, Weekday}; -use winnow::{combinator::alt, PResult, Parser}; +use chrono::{NaiveDateTime, Weekday}; +use winnow::{ + ascii::multispace0, + combinator::{alt, preceded}, + error::ParserError, + stream::{AsChar, Stream, StreamIsPartial}, + PResult, Parser, +}; mod date; +mod time; pub enum Item { Date(date::Date), - TimeOfDay(NaiveTime), + Time(time::Time), _TimeZone, Combined(NaiveDateTime), Weekday(Weekday), @@ -40,11 +47,19 @@ pub enum Item { _PureNumber, } -pub fn parse(input: &mut &str) -> PResult { - alt((date::parse.map(Item::Date),)).parse_next(input) +/// Allow spaces after a parser +fn s(p: impl Parser) -> impl Parser +where + I: StreamIsPartial + Stream, + ::Token: AsChar + Clone, + E: ParserError, +{ + preceded(multispace0, p) } -mod time {} +pub fn parse(input: &mut &str) -> PResult { + alt((date::parse.map(Item::Date), time::parse.map(Item::Time))).parse_next(input) +} mod time_zone {} diff --git a/src/items/time.rs b/src/items/time.rs new file mode 100644 index 0000000..49ad76c --- /dev/null +++ b/src/items/time.rs @@ -0,0 +1,386 @@ +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +// spell-checker:ignore multispace Caseless shhmm colonless + +//! Parse a time item (without a date) +//! +//! The GNU docs state: +//! +//! > More generally, the time of day may be given as ‘hour:minute:second’, +//! > where hour is a number between 0 and 23, minute is a number between 0 and +//! > 59, and second is a number between 0 and 59 possibly followed by ‘.’ or +//! > ‘,’ and a fraction containing one or more digits. Alternatively, +//! > ‘:second’ can be omitted, in which case it is taken to be zero. On the +//! > rare hosts that support leap seconds, second may be 60. +//! > +//! > If the time is followed by ‘am’ or ‘pm’ (or ‘a.m.’ or ‘p.m.’), hour is +//! > restricted to run from 1 to 12, and ‘:minute’ may be omitted (taken to be +//! > zero). ‘am’ indicates the first half of the day, ‘pm’ indicates the +//! > second half of the day. In this notation, 12 is the predecessor of 1: +//! > midnight is ‘12am’ while noon is ‘12pm’. (This is the zero-oriented +//! > interpretation of ‘12am’ and ‘12pm’, as opposed to the old tradition +//! > derived from Latin which uses ‘12m’ for noon and ‘12pm’ for midnight.) +//! > +//! > The time may alternatively be followed by a time zone correction, +//! > expressed as ‘shhmm’, where s is ‘+’ or ‘-’, hh is a number of zone hours +//! > and mm is a number of zone minutes. The zone minutes term, mm, may be +//! > omitted, in which case the one- or two-digit correction is interpreted as +//! > a number of hours. You can also separate hh from mm with a colon. When a +//! > time zone correction is given this way, it forces interpretation of the +//! > time relative to Coordinated Universal Time (UTC), overriding any +//! > previous specification for the time zone or the local time zone. For +//! > example, ‘+0530’ and ‘+05:30’ both stand for the time zone 5.5 hours +//! > ahead of UTC (e.g., India). This is the best way to specify a time zone +//! > correction by fractional parts of an hour. The maximum zone correction is +//! > 24 hours. +//! > +//! > Either ‘am’/‘pm’ or a time zone correction may be specified, but not both. + +use winnow::{ + ascii::{dec_uint, float, Caseless}, + combinator::{alt, opt, preceded}, + seq, + stream::AsChar, + token::take_while, + PResult, Parser, +}; + +use super::s; + +#[derive(PartialEq, Debug)] +pub struct Time { + hour: i32, + minute: i32, + second: f64, + offset: Option, +} + +#[derive(PartialEq, Debug, Clone)] +pub struct Offset { + negative: bool, + hours: u32, + minutes: u32, +} + +#[derive(Clone)] +enum Suffix { + Am, + Pm, + TimeZone(Offset), +} + +pub fn parse(input: &mut &str) -> PResult