diff --git a/Cargo.lock b/Cargo.lock index 8cd1830..a9b25e0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4,9 +4,9 @@ version = 3 [[package]] name = "aho-corasick" -version = "1.0.1" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67fc08ce920c31afb70f013dcce1bfc3a3195de6a228474e45e1f145b36f8d04" +checksum = "43f6cb1bf222025340178f382c426f13757b2960e89779dfcb319c32542a5a41" dependencies = [ "memchr", ] @@ -70,9 +70,9 @@ checksum = "e496a50fda8aacccc86d7529e2c1e0892dbd0f898a6b5645b5561b89c3210efa" [[package]] name = "iana-time-zone" -version = "0.1.56" +version = "0.1.57" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0722cd7114b7de04316e7ea5456a0bbb20e4adb46fd27a3697adb812cff0f37c" +checksum = "2fad5b825842d2b38bd206f3e81d6957625fd7f0a361e345c30e01a0ae2dd613" dependencies = [ "android_system_properties", "core-foundation-sys", @@ -93,24 +93,24 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.63" +version = "0.3.64" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f37a4a5928311ac501dee68b3c7613a1037d0edb30c8e5427bd832d55d1b790" +checksum = "c5f195fe497f702db0f318b07fdd68edb16955aed830df8363d837542f8f935a" dependencies = [ "wasm-bindgen", ] [[package]] name = "libc" -version = "0.2.144" +version = "0.2.146" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b00cc1c228a6782d0f076e7b232802e0c5689d41bb5df366f2a6b6621cfdfe1" +checksum = "f92be4933c13fd498862a9e02a3055f8a8d9c039ce33db97306fd5a6caa7f29b" [[package]] name = "log" -version = "0.4.18" +version = "0.4.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "518ef76f2f87365916b142844c16d8fefd85039bc5699050210a7778ee1cd1de" +checksum = "b06a4cde4c0f271a446782e3eff8de789548ce57dbc8eca9292c27f4a42004b4" [[package]] name = "memchr" @@ -118,6 +118,22 @@ version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + [[package]] name = "num-traits" version = "0.2.15" @@ -138,14 +154,15 @@ name = "parse_datetime" version = "0.4.0" dependencies = [ "chrono", + "nom", "regex", ] [[package]] name = "proc-macro2" -version = "1.0.59" +version = "1.0.60" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6aeca18b86b413c660b781aa319e4e2648a3e6f9eadc9b47e9038e6fe9f3451b" +checksum = "dec2b086b7a862cf4de201096214fa870344cf922b2b30c167badb3af3195406" dependencies = [ "unicode-ident", ] @@ -161,9 +178,9 @@ dependencies = [ [[package]] name = "regex" -version = "1.8.1" +version = "1.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af83e617f331cc6ae2da5443c602dfa5af81e517212d9d611a5b3ba1777b5370" +checksum = "d0ab3ca65655bb1e41f2a8c8cd662eb4fb035e67c3f78da1d61dffe89d07300f" dependencies = [ "aho-corasick", "memchr", @@ -172,9 +189,9 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.7.1" +version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5996294f19bd3aae0453a862ad728f60e6600695733dd5df01da90c54363a3c" +checksum = "436b050e76ed2903236f032a59761c1eb99e1b0aead2c257922771dab1fc8c78" [[package]] name = "syn" @@ -195,9 +212,9 @@ checksum = "b15811caf2415fb889178633e7724bad2509101cde276048e013b9def5e51fa0" [[package]] name = "wasm-bindgen" -version = "0.2.86" +version = "0.2.87" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5bba0e8cb82ba49ff4e229459ff22a191bbe9a1cb3a341610c9c33efc27ddf73" +checksum = "7706a72ab36d8cb1f80ffbf0e071533974a60d0a308d01a5d0375bf60499a342" dependencies = [ "cfg-if", "wasm-bindgen-macro", @@ -205,9 +222,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-backend" -version = "0.2.86" +version = "0.2.87" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "19b04bc93f9d6bdee709f6bd2118f57dd6679cf1176a1af464fca3ab0d66d8fb" +checksum = "5ef2b6d3c510e9625e5fe6f509ab07d66a760f0885d858736483c32ed7809abd" dependencies = [ "bumpalo", "log", @@ -220,9 +237,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.86" +version = "0.2.87" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14d6b024f1a526bb0234f52840389927257beb670610081360e5a03c5df9c258" +checksum = "dee495e55982a3bd48105a7b947fd2a9b4a8ae3010041b9e0faab3f9cd028f1d" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -230,9 +247,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.86" +version = "0.2.87" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e128beba882dd1eb6200e1dc92ae6c5dbaa4311aa7bb211ca035779e5efc39f8" +checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b" dependencies = [ "proc-macro2", "quote", @@ -243,9 +260,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.86" +version = "0.2.87" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed9d5b4305409d1fc9482fee2d7f9bcbf24b3972bf59817ef757e23982242a93" +checksum = "ca6ad05a4870b2bf5fe995117d3728437bd27d7cd5f06f13c17443ef369775a1" [[package]] name = "winapi" diff --git a/Cargo.toml b/Cargo.toml index d571e37..e61d337 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,3 +10,4 @@ readme = "README.md" [dependencies] regex = "1.8" chrono = { version="0.4", default-features=false, features=["std", "alloc", "clock"] } +nom = "7.1" \ No newline at end of file diff --git a/fuzz/fuzz_targets/from_str.rs b/fuzz/fuzz_targets/from_str.rs index 63b55d1..53bfbd1 100644 --- a/fuzz/fuzz_targets/from_str.rs +++ b/fuzz/fuzz_targets/from_str.rs @@ -1,3 +1,6 @@ +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + #![no_main] use libfuzzer_sys::fuzz_target; diff --git a/fuzz/fuzz_targets/parse_datetime_from_str.rs b/fuzz/fuzz_targets/parse_datetime_from_str.rs index 7d285e5..bf65156 100644 --- a/fuzz/fuzz_targets/parse_datetime_from_str.rs +++ b/fuzz/fuzz_targets/parse_datetime_from_str.rs @@ -1,3 +1,6 @@ +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + #![no_main] use libfuzzer_sys::fuzz_target; diff --git a/src/lib.rs b/src/lib.rs index 8a104da..f53ed2e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -3,6 +3,7 @@ // Expose parse_datetime pub mod parse_datetime; +pub mod parse_items; use chrono::{Duration, Local, NaiveDate, Utc}; use regex::{Error as RegexError, Regex}; diff --git a/src/parse_items.rs b/src/parse_items.rs new file mode 100644 index 0000000..b4bbb3a --- /dev/null +++ b/src/parse_items.rs @@ -0,0 +1,46 @@ +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +use nom::error::Error; +use nom::{IResult, Parser}; + +pub mod items; +pub(self) mod fixed_number; +pub(self) mod nano_seconds; + +type PError<'i> = Error<&'i str>; +type PResult<'i, O> = IResult<&'i str, O, PError<'i>>; + +fn singleton_list<'i, O>(mut inner: impl Parser<&'i str, O, PError<'i>>) -> impl Parser<&'i str, Vec, PError<'i>> { + move |input: &'i str| { + let (tail, result) = inner.parse(input)?; + Ok((tail, vec![result])) + } +} + +#[cfg(test)] +mod tests { + macro_rules! ptest { + ($name:ident : $parser:ident($input:literal) => $out:expr, $tail:literal) => { + #[test] + fn $name() { + assert_eq!( + $parser.parse($input), + Ok(( + $tail, + $out + )) + ); + } + }; + ($name:ident : $parser:ident($input:literal) => X) => { + #[test] + fn $name() { + let result = $parser.parse($input); + assert!(result.is_err(), "{:?}", result); + } + }; + } + + pub(super) use ptest; +} \ No newline at end of file diff --git a/src/parse_items/fixed_number.rs b/src/parse_items/fixed_number.rs new file mode 100644 index 0000000..8d6d5fb --- /dev/null +++ b/src/parse_items/fixed_number.rs @@ -0,0 +1,72 @@ +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +use nom::combinator::all_consuming; +use nom::{bytes::complete::take, character::complete, combinator::map_parser, Parser}; + +use crate::parse_items::PError; + +macro_rules! fixed_number_impl { + ($($t:ident),+) => {$( + #[allow(dead_code)] + pub fn $t<'i>(width: usize) -> impl Parser<&'i str, $t, PError<'i>> { + move |input: &'i str| { + map_parser(take(width), all_consuming(complete::$t)).parse(input) + } + } + )+}; + } + +fixed_number_impl! { u8, u16, u32, u64, u128 } + +#[cfg(test)] +mod tests { + use crate::parse_items::{tests::ptest, PResult}; + + use super::*; + + #[test] + fn zero_width() { + let result = u32(0).parse("1234"); + assert!(result.is_err(), "{:?}", result); + } + + #[test] + fn one_width() { + assert_eq!(u32(1).parse("1234"), Ok(("234", 1))); + } + + #[test] + fn does_not_fit_type() { + let result = u8(4).parse("1234"); + assert!(result.is_err(), "{:?}", result); + } + + #[test] + fn does_not_fit_negative() { + let result = u8(3).parse("-123"); + assert!(result.is_err(), "{:?}", result); + } + + #[test] + fn input_too_short() { + let result = u32(6).parse("1234"); + assert!(result.is_err(), "{:?}", result); + } + + #[test] + fn three() { + assert_eq!(u32(3).parse("123abc"), Ok(("abc", 123))); + } + + #[test] + fn leading_zeroes() { + assert_eq!(u32(3).parse("00123"), Ok(("23", 1))); + } + + #[test] + fn non_digits() { + let result = u32(4).parse("123abc"); + assert!(result.is_err(), "{:?}", result); + } +} diff --git a/src/parse_items/gnu-items.md b/src/parse_items/gnu-items.md new file mode 100644 index 0000000..704e8a6 --- /dev/null +++ b/src/parse_items/gnu-items.md @@ -0,0 +1,27 @@ + + +## General date syntax +https://www.gnu.org/software/coreutils/manual/html_node/General-date-syntax.html + +A date string can have different flavours (items): +- calendar date +- time of day +- time zone +- combined date and time of day +- day of the week +- relative +- numbers +- empty string (beginning of the day) + +Some properties: +- the order of items should not matter +- whitespace may be omitted when unambiguous +- ordinal numbers may be written out in some items +- comments between parentheses '(', ')' +- alphabetic case is ignored +- hyphens not followed by digit are ignored +- leading zeros on numbers are ignored +- leap seconds on supported systems diff --git a/src/parse_items/items.rs b/src/parse_items/items.rs new file mode 100644 index 0000000..d7425be --- /dev/null +++ b/src/parse_items/items.rs @@ -0,0 +1,112 @@ +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +use nom::branch::alt; +use nom::character::complete::space0; +use nom::combinator::all_consuming; +use nom::sequence::preceded; +use nom::Parser; + +use crate::parse_items::items::calendar_day::{ + calendar_day, month_day, RawCalendarDay, RawMonthDay, +}; +use crate::parse_items::items::seconds_epoch::{seconds_epoch, SecondsEpoch}; +use crate::parse_items::items::time_of_day::{time_of_day, RawTimeOfDay, TimeZoneCorrection}; +use crate::parse_items::items::tz_identifier::{tz_identifier, TzIdentifier}; +use crate::parse_items::{PError, PResult}; + +pub mod calendar_day; +pub mod seconds_epoch; +pub mod time_of_day; +pub mod tz_identifier; + +#[derive(Clone, Debug, Eq, PartialEq)] +pub enum Item { + SecondsEpoch(SecondsEpoch), + TimeZoneRule(TzIdentifier), + CalendarDay(RawCalendarDay), // replace NaiveDay? + MonthDay(RawMonthDay), // replace ? + TimeOfDay(RawTimeOfDay), // replace NaiveTime? + TimeZoneCorrection(TimeZoneCorrection), +} + +fn single_parser(input: &str) -> PResult> { + preceded( + space0, + alt(( + tz_identifier, + seconds_epoch, + calendar_day, + month_day, + time_of_day, + )), + ) + .parse(input) +} + +pub fn parse(mut input: &str) -> Result, PError> { + let mut all_items = vec![]; + loop { + let (tail, items) = single_parser(input).map_err(|err| match err { + nom::Err::Error(e) => e, + nom::Err::Failure(e) => e, + nom::Err::Incomplete(_) => panic!("Should only use complete parsers"), + })?; + + for item in items { + all_items.push(item) + } + input = tail; + + if all_consuming(space0::<&str, PError>).parse(input).is_ok() { + break Ok(all_items); + } + } +} + +#[cfg(test)] +mod tests { + use crate::parse_items::items::calendar_day::{RawCalendarDay, RawMonthDay}; + use crate::parse_items::items::seconds_epoch::SecondsEpoch; + use crate::parse_items::items::time_of_day::{RawTimeOfDay, TimeZoneCorrection}; + use crate::parse_items::items::tz_identifier::TzIdentifier; + use crate::parse_items::items::{parse, Item}; + + #[test] + fn some_items() { + let result = + parse("TZ=\"Europe/Amsterdam\" @123.456 14nov2022 11/14 12:34:56.789123456+01:30 11pm"); + assert_eq!( + result, + Ok(vec![ + Item::TimeZoneRule(TzIdentifier("Europe/Amsterdam".to_owned())), + Item::SecondsEpoch(SecondsEpoch { + seconds: 123, + nanoseconds: 456000000 + }), + Item::CalendarDay(RawCalendarDay { + year: 2022, + month: 11, + day: 14, + }), + Item::MonthDay(RawMonthDay { month: 11, day: 14 }), + Item::TimeOfDay(RawTimeOfDay { + hours: 12, + minutes: 34, + seconds: 56, + nanoseconds: 789123456, + }), + Item::TimeZoneCorrection(TimeZoneCorrection { + hours: 1, + minutes: 30, + }), + Item::TimeOfDay(RawTimeOfDay { + hours: 23, + minutes: 0, + seconds: 0, + nanoseconds: 0, + }), + ]) + ) + } +} diff --git a/src/parse_items/items/calendar_day.rs b/src/parse_items/items/calendar_day.rs new file mode 100644 index 0000000..270504a --- /dev/null +++ b/src/parse_items/items/calendar_day.rs @@ -0,0 +1,236 @@ +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +use nom::branch::alt; +use nom::bytes::complete::{tag, tag_no_case}; +use nom::character::complete; +use nom::character::complete::{digit1, space0, space1}; +use nom::combinator::{consumed, map, not, opt, peek, value}; +use nom::sequence::{delimited, preceded, terminated, tuple}; +use nom::Parser; + +use crate::parse_items::items::Item; +use crate::parse_items::singleton_list; +use crate::parse_items::{fixed_number, PResult}; + +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub struct RawCalendarDay { + pub day: u8, + pub month: u8, + pub year: u16, +} + +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub struct RawMonthDay { + pub day: u8, + pub month: u8, +} + +pub fn calendar_day(input: &str) -> PResult> { + singleton_list(map(raw_calendar_day, |calendar_day: RawCalendarDay| { + Item::CalendarDay(calendar_day) + })) + .parse(input) +} + +fn raw_calendar_day(input: &str) -> PResult { + alt((iso_gnu, us_format, letter, letter_us)).parse(input) +} + +pub fn month_day(input: &str) -> PResult> { + singleton_list(map(raw_month_day, Item::MonthDay)).parse(input) +} + +fn raw_month_day(input: &str) -> PResult { + let (tail, (month, day)) = alt(( + tuple((complete::u8, preceded(space0_slash, complete::u8))), + tuple((month, preceded(space0, complete::u8))), + map( + tuple((complete::u8, preceded(space0, month))), + |(day, month)| (month, day), + ), + )) + .parse(input)?; + + Ok((tail, RawMonthDay { month, day })) +} + +fn space0_dash(input: &str) -> PResult<&str> { + let (tail, (consumed, _)) = consumed(delimited(space0, tag("-"), space0)).parse(input)?; + Ok((tail, consumed)) +} + +fn year_wo_century(input: &str) -> PResult { + let (tail, year) = fixed_number::u8(2).parse(input)?; + let year = year as u16 + if year <= 68 { 2000 } else { 1900 }; + Ok((tail, year)) +} + +fn iso_gnu(input: &str) -> PResult { + let (input, (year, month, day)) = tuple(( + alt(( + terminated(year_wo_century, space0_dash), + terminated(complete::u16, space0_dash), + )), + terminated(complete::u8, space0_dash), + complete::u8, + )) + .parse(input)?; + + Ok((input, RawCalendarDay { day, month, year })) +} + +fn space0_slash(input: &str) -> PResult<&str> { + let (tail, (consumed, _)) = consumed(delimited(space0, tag("/"), space0)).parse(input)?; + Ok((tail, consumed)) +} + +fn year(input: &str) -> PResult { + alt(( + terminated(year_wo_century, peek(not(digit1))), + complete::u16, + )) + .parse(input) +} + +fn us_format(input: &str) -> PResult { + let (input, (month, day, year)) = tuple(( + terminated(complete::u8, space0_slash), + terminated(complete::u8, space0_slash), + year, + )) + .parse(input)?; + + Ok((input, RawCalendarDay { day, month, year })) +} + +fn month(input: &str) -> PResult { + alt(( + value(1, alt((tag_no_case("january"), tag_no_case("jan")))), + value(2, alt((tag_no_case("february"), tag_no_case("feb")))), + value(3, alt((tag_no_case("march"), tag_no_case("mar")))), + value(4, alt((tag_no_case("april"), tag_no_case("apr")))), + value(5, tag_no_case("may")), + value(6, alt((tag_no_case("june"), tag_no_case("jun")))), + value(7, alt((tag_no_case("july"), tag_no_case("jul")))), + value(8, alt((tag_no_case("august"), tag_no_case("aug")))), + value( + 9, + alt(( + tag_no_case("september"), + tag_no_case("sept"), + tag_no_case("sep"), + )), + ), + value(10, alt((tag_no_case("october"), tag_no_case("oct")))), + value(11, alt((tag_no_case("november"), tag_no_case("nov")))), + value(12, alt((tag_no_case("december"), tag_no_case("dec")))), + )) + .parse(input) +} + +fn space0_opt_dash(input: &str) -> PResult<&str> { + let (tail, (consumed, _)) = consumed(tuple((space0, opt(tag("-")), space0))).parse(input)?; + Ok((tail, consumed)) +} + +fn letter(input: &str) -> PResult { + let (tail, (day, month, year)) = tuple(( + terminated(complete::u8, space0_opt_dash), + terminated(month, space0_opt_dash), + year, + )) + .parse(input)?; + + Ok((tail, RawCalendarDay { day, month, year })) +} + +fn letter_us(input: &str) -> PResult { + let (tail, (month, day, year)) = tuple(( + terminated(month, space0_opt_dash), + terminated( + complete::u8, + alt((delimited(space0, tag(","), space1), space0_opt_dash)), + ), + year, + )) + .parse(input)?; + + Ok((tail, RawCalendarDay { day, month, year })) +} + +#[cfg(test)] +mod tests { + use nom::Parser; + + use crate::parse_items::tests::ptest; + + use super::*; + + macro_rules! cd { + ($name:ident : $input:literal => $year:literal-$month:literal-$day:literal + $tail:literal) => { + ptest! { $name : raw_calendar_day($input) => RawCalendarDay { year: $year, month: $month, day: $day }, $tail } + }; + ($name:ident : $input:literal => X) => { + ptest! { $name : raw_calendar_day($input) => X } + }; + } + + cd! { iso_like : "23-45-67abc" => 2023-45-67 + "abc" } + cd! { iso_like_2000 : "68-45-67abc" => 2068-45-67 + "abc" } + cd! { iso_like_1900 : "69-45-67abc" => 1969-45-67 + "abc" } + cd! { us : "34/12/5678abc" => 5678-34-12 + "abc" } + cd! { us_short_year : "34/12/56abc" => 2056-34-12 + "abc" } + + cd! { gnu_iso : "2022-11-14" => 2022-11-14 + "" } + cd! { gnu_iso_zero_prefix : "022-011-014" => 0022-11-14 + "" } + cd! { gnu_iso_wo_century : "22-11-14" => 2022-11-14 + "" } + cd! { gnu_us : "11/14/2022" => 2022-11-14 + "" } + cd! { gnu_us_wo_century : "11/14/22" => 2022-11-14 + "" } + cd! { gnu_us_zeroes : "011/014/022" => 0022-11-14 + "" } + cd! { gnu_us_spaces : "11 / 14 / 2022" => 2022-11-14 + "" } + cd! { gnu_letter : "14 November 2022" => 2022-11-14 + "" } + cd! { gnu_letter_abbr : "14 Nov 2022" => 2022-11-14 + "" } + cd! { gnu_letter_us : "November 14, 2022" => 2022-11-14 + "" } + cd! { gnu_letter_us_wo_comma : "November 14 2022" => 2022-11-14 + "" } + cd! { gnu_lit_month_1 : "14-nov-2022" => 2022-11-14 + "" } + cd! { gnu_lit_month_spaces : "14 - nov - 2022" => 2022-11-14 + "" } + cd! { gnu_lit_month_2 : "14nov2022" => 2022-11-14 + "" } + + cd! { written_ordinals_1 : "first nov 2022" => X } + cd! { written_ordinals_2 : "eleven nov 2022" => X } + cd! { written_ordinals_3 : "nov eleven 2022" => X } + cd! { written_ordinals_4 : "2022-eleven-14" => X } + cd! { written_ordinals_5 : "2022-11-eleven" => X } + cd! { written_ordinals_6 : "22-11-eleven" => X } + + cd! { letter_1 : "14 november 2022" => 2022-11-14 + "" } + cd! { letter_2 : "14 nov 2022" => 2022-11-14 + "" } + cd! { letter_3 : "14nov2022" => 2022-11-14 + "" } + cd! { letter_4 : "14-nov-2022" => 2022-11-14 + "" } + cd! { letter_5 : "14 nov 22" => 2022-11-14 + "" } + + cd! { us_letter_1 : "november 14 2022" => 2022-11-14 + "" } + cd! { us_letter_2 : "nov 14 2022" => 2022-11-14 + "" } + cd! { us_letter_3 : "nov14 2022" => 2022-11-14 + "" } + cd! { us_letter_4 : "nov-14-2022" => 2022-11-14 + "" } + cd! { us_letter_5 : "nov 14 22" => 2022-11-14 + "" } + cd! { us_letter_6 : "november 14, 2022" => 2022-11-14 + "" } + cd! { us_letter_7 : "nov 14, 2022" => 2022-11-14 + "" } + cd! { us_letter_8 : "nov14, 2022" => 2022-11-14 + "" } + cd! { us_letter_9 : "nov 14, 2022" => 2022-11-14 + "" } + + macro_rules! md { + ($name:ident : $input:literal => xxxx-$month:literal-$day:literal + $tail:literal) => { + ptest! { $name : raw_month_day($input) => RawMonthDay { month: $month, day: $day }, $tail } + }; + ($name:ident : $input:literal => X) => { + ptest! { $name : raw_month_day($input) => X } + }; + } + + md! { written_month : "14 november" => xxxx-11-14 + "" } + md! { written_month_short : "14 nov" => xxxx-11-14 + "" } + md! { written_month_us : "nov 14" => xxxx-11-14 + "" } + md! { us_slash : "11/14" => xxxx-11-14 + "" } +} diff --git a/src/parse_items/items/seconds_epoch.rs b/src/parse_items/items/seconds_epoch.rs new file mode 100644 index 0000000..f1e4832 --- /dev/null +++ b/src/parse_items/items/seconds_epoch.rs @@ -0,0 +1,63 @@ +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +use nom::combinator::map; +use nom::{ + bytes::complete::tag, + character::complete, + combinator::opt, + sequence::{preceded, tuple}, + Parser, +}; + +use crate::parse_items::items::Item; +use crate::parse_items::nano_seconds::nano_seconds; +use crate::parse_items::singleton_list; +use crate::parse_items::PResult; + +#[derive(Debug, Clone, Copy, Eq, PartialEq, Ord, PartialOrd)] +pub struct SecondsEpoch { + pub seconds: i64, + pub nanoseconds: u32, +} + +pub fn seconds_epoch(input: &str) -> PResult> { + singleton_list(map(raw_seconds_epoch, Item::SecondsEpoch)).parse(input) +} + +fn raw_seconds_epoch(input: &str) -> PResult { + let (tail, (seconds, nanoseconds)) = + preceded(tag("@"), tuple((complete::i64, opt(nano_seconds)))).parse(input)?; + + let nanoseconds = nanoseconds.unwrap_or(0); + Ok(( + tail, + SecondsEpoch { + seconds, + nanoseconds, + }, + )) +} + +#[cfg(test)] +mod tests { + use crate::parse_items::tests::ptest; + + use super::*; + + macro_rules! epoch { + ($name:ident : $input:literal => $seconds:literal:$nanoseconds:literal + $tail:literal) => { + ptest! { $name : raw_seconds_epoch($input) => SecondsEpoch { seconds: $seconds, nanoseconds: $nanoseconds } , $tail } + }; + ($name:ident : $input:literal => X) => { + ptest! { $name : raw_seconds_epoch($input) => X } + }; + } + + epoch! { positive : "@123abc" => 123:0 + "abc" } + epoch! { negative : "@-9876abc" => -9876:0 + "abc" } + epoch! { no_at : "-9876abc" => X } + epoch! { short_fraction : "@123.456" => 123:456000000 + "" } + epoch! { almost_a_second : "@0.999999999" => 0:999999999 + "" } + epoch! { silent_ignore_long_fraction : "@-123.98765432184723146abc" => -123:987654321 + "abc" } +} diff --git a/src/parse_items/items/time_of_day.rs b/src/parse_items/items/time_of_day.rs new file mode 100644 index 0000000..d660bd7 --- /dev/null +++ b/src/parse_items/items/time_of_day.rs @@ -0,0 +1,179 @@ +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +use nom::branch::alt; +use nom::bytes::complete::{tag, tag_no_case, take}; +use nom::character::complete; +use nom::character::complete::digit1; +use nom::combinator::{map, map_parser, not, opt, peek, value, verify}; +use nom::sequence::preceded; +use nom::Parser; + +use crate::parse_items::items::Item; +use crate::parse_items::nano_seconds::nano_seconds; +use crate::parse_items::singleton_list; +use crate::parse_items::{fixed_number, PResult}; + +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct RawTimeOfDay { + pub hours: u8, + pub minutes: u8, + pub seconds: u8, + pub nanoseconds: u32, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct TimeZoneCorrection { + pub hours: i8, + pub minutes: u8, +} + +pub fn time_of_day(input: &str) -> PResult> { + alt(( + time_of_day_24, + singleton_list(map(time_of_day_12, Item::TimeOfDay)), + )) + .parse(input) +} + +// 08:02:00.0000pm +// 08:02:00pm +// 08:02pm +// 8pm +fn time_of_day_12(input: &str) -> PResult { + let (tail, hours) = complete::u8.parse(input)?; + let (tail, minutes) = opt(preceded(tag(":"), fixed_number::u8(2))).parse(tail)?; + let (tail, seconds) = if minutes.is_some() { + opt(preceded(tag(":"), fixed_number::u8(2))).parse(tail)? + } else { + (tail, None) + }; + let (tail, nanoseconds) = if seconds.is_some() { + opt(nano_seconds).parse(tail)? + } else { + (tail, None) + }; + let (tail, meridiem) = alt(( + value(0, tag_no_case("am")), + value(0, tag_no_case("a.m.")), + value(12, tag_no_case("pm")), + value(12, tag_no_case("p.m.")), + )) + .parse(tail)?; + // 12 < 1... + let hours = (hours % 12) + meridiem; + + Ok(( + tail, + RawTimeOfDay { + hours, + minutes: minutes.unwrap_or_default(), + seconds: seconds.unwrap_or_default(), + nanoseconds: nanoseconds.unwrap_or_default(), + }, + )) +} + +fn time_zone_correction(input: &str) -> PResult { + let (tail, sign) = alt((value(1, tag("+")), value(-1, tag("-")))).parse(input)?; + let (tail, hours) = verify(fixed_number::u8(2), |&hour| hour <= 24).parse(tail)?; + let (tail, minutes) = opt(alt(( + fixed_number::u8(2), + preceded(tag(":"), fixed_number::u8(2)), + ))) + .parse(tail)?; + peek(not(map_parser(take(1u8), digit1))).parse(tail)?; + Ok(( + tail, + TimeZoneCorrection { + hours: sign * hours as i8, + minutes: minutes.unwrap_or_default(), + }, + )) +} + +// 20:02:00.0000-0500 +// 20:02:00.0000 +// 20:02:00-0500 +// 20:02:00 +// 20:02-0500 +// 20:02 +fn time_of_day_24(input: &str) -> PResult> { + let (tail, hours) = fixed_number::u8(2).parse(input)?; + let (tail, minutes) = preceded(tag(":"), fixed_number::u8(2)).parse(tail)?; + let (tail, seconds) = opt(preceded(tag(":"), fixed_number::u8(2))).parse(tail)?; + let (tail, nanoseconds) = if seconds.is_some() { + opt(nano_seconds).parse(tail)? + } else { + (tail, None) + }; + + let time_of_day = RawTimeOfDay { + hours, + minutes, + seconds: seconds.unwrap_or_default(), + nanoseconds: nanoseconds.unwrap_or_default(), + }; + + if let (tail, Some(tz_correction)) = opt(time_zone_correction).parse(tail)? { + Ok(( + tail, + vec![ + Item::TimeOfDay(time_of_day), + Item::TimeZoneCorrection(tz_correction), + ], + )) + } else { + Ok((tail, vec![Item::TimeOfDay(time_of_day)])) + } +} + +#[cfg(test)] +mod tests { + use nom::Parser; + + use crate::parse_items::tests::ptest; + + use super::*; + + macro_rules! tzc { + ($name:ident : $input:literal => $hours:literal:$minutes:literal + $tail:literal) => { + ptest! { $name : time_zone_correction($input) => TimeZoneCorrection { hours: $hours, minutes: $minutes }, $tail } + }; + ($name:ident : $input:literal => X) => { + ptest! { $name : time_zone_correction($input) => X } + }; + } + + tzc! { positive : "+12:34a" => 12:34 + "a" } + tzc! { negative : "-12:34a" => -12:34 + "a" } + tzc! { more_numbers : "+12:345" => X } + tzc! { without_colon : "+1234a" => 12:34 + "a" } + tzc! { without_minutes : "+12a" => 12:00 + "a" } + // tzc! { without_minutes_with_colon : "+12:a" => X } + // tzc! { single_digit_hour : "+1:23a" => X } + // tzc! { single_digit_minute : "+12:3a" => X } + // tzc! { negative_minutes : "+12:-34a" => X } + + macro_rules! t12 { + ($name:ident : $input:literal => $hours:literal:$minutes:literal:$seconds:literal:$nanoseconds:literal + $tail:literal) => { + ptest! { $name : time_of_day_12($input) => RawTimeOfDay { hours: $hours, minutes: $minutes, seconds: $seconds, nanoseconds: $nanoseconds }, $tail } + }; + ($name:ident : $input:literal => X) => { + ptest! { $name : time_of_day_12($input) => X } + }; + } + + t12! { twentythree : "11pm" => 23:00:00:0000 + "" } + t12! { midnight : "12am" => 00:00:00:0000 + "" } + t12! { one : "1am" => 01:00:00:0000 + "" } + t12! { morning : "8am" => 08:00:00:0000 + "" } + t12! { real_eleven : "11am" => 11:00:00:0000 + "" } + t12! { noon : "12pm" => 12:00:00:0000 + "" } + t12! { afternoon : "8pm" => 20:00:00:0000 + "" } + t12! { minutes : "8:10am" => 08:10:00:0000 + "" } + t12! { seconds : "8:10:20am" => 08:10:20:0000 + "" } + t12! { nanoseconds : "8:10:20.12345am" => 08:10:20:123450000 + "" } + + t12! { nanoseconds_wo_seconds : "8:10.12345am" => X } +} diff --git a/src/parse_items/items/tz_identifier.rs b/src/parse_items/items/tz_identifier.rs new file mode 100644 index 0000000..f7d1173 --- /dev/null +++ b/src/parse_items/items/tz_identifier.rs @@ -0,0 +1,65 @@ +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +use nom::branch::alt; +use nom::bytes::complete::{escaped_transform, tag}; +use nom::character::complete::none_of; +use nom::combinator::{map, value}; +use nom::sequence::delimited; +use nom::Parser; + +use crate::parse_items::items::Item; +use crate::parse_items::singleton_list; +use crate::parse_items::PResult; + +#[derive(Debug, Clone, Eq, PartialEq)] +pub struct TzIdentifier(pub String); + +pub fn tz_identifier(input: &str) -> PResult> { + singleton_list(map(raw_tz_identifier, Item::TimeZoneRule)).parse(input) +} + +fn raw_tz_identifier(input: &str) -> PResult { + let (tail, id) = delimited( + tag("TZ=\""), + escaped_transform( + none_of("\\\""), + '\\', + alt((value("\\", tag("\\")), value("\"", tag("\"")))), + ), + tag("\""), + ) + .parse(input)?; + Ok((tail, TzIdentifier(id))) +} + +#[cfg(test)] +mod tests { + use nom::Parser; + + use super::*; + + #[test] + fn amsterdam() { + assert_eq!( + raw_tz_identifier.parse(r#"TZ="Europe/Amsterdam" 14 november"#), + Ok((" 14 november", TzIdentifier("Europe/Amsterdam".into()))) + ); + } + + #[test] + fn new_york() { + assert_eq!( + raw_tz_identifier.parse(r#"TZ="Americas/New_York" nov14"#), + Ok((" nov14", TzIdentifier("Americas/New_York".into()))) + ); + } + + #[test] + fn escape() { + assert_eq!( + raw_tz_identifier.parse(r#"TZ="\"Escape\"\\this"nov14"#), + Ok(("nov14", TzIdentifier(r#""Escape"\this"#.into()))) + ); + } +} diff --git a/src/parse_items/nano_seconds.rs b/src/parse_items/nano_seconds.rs new file mode 100644 index 0000000..a4191d5 --- /dev/null +++ b/src/parse_items/nano_seconds.rs @@ -0,0 +1,47 @@ +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +use nom::bytes::complete::take_while1; +use nom::character::complete; +use nom::character::complete::one_of; +use nom::combinator::all_consuming; +use nom::sequence::preceded; +use nom::Parser; + +use crate::parse_items::PResult; + +pub(crate) fn nano_seconds(input: &str) -> PResult { + let (tail, fraction) = + preceded(one_of(",."), take_while1(|c: char| c.is_ascii_digit())).parse(input)?; + + let digits_used = fraction.len().min(9); + let ns_per_frac = 10u32.pow(9 - digits_used as u32); + + let (_, fraction) = all_consuming(complete::u32).parse(&fraction[..digits_used])?; + let nanoseconds = fraction * ns_per_frac; + + Ok((tail, nanoseconds)) +} + +#[cfg(test)] +mod tests { + use crate::parse_items::tests::ptest; + + use super::*; + + macro_rules! ns { + ($name:ident : $input:literal => $nano_seconds:literal + $tail:literal) => { + ptest! { $name : nano_seconds($input) => $nano_seconds, $tail } + }; + ($name:ident : $input:literal => X) => { + ptest! { $name : nano_seconds($input) => X } + }; + } + + ns! { without_digits : "." => X } + ns! { one : ".1" => 100000000 + "" } + ns! { comma : ",123" => 123000000 + "" } + ns! { nine : ".123456789" => 123456789 + "" } + ns! { more : ".123456789101112" => 123456789 + "" } + ns! { negative : ".-123456789" => X } +} diff --git a/tests/simple.rs b/tests/simple.rs index a538f9d..5d649f5 100644 --- a/tests/simple.rs +++ b/tests/simple.rs @@ -1,3 +1,6 @@ +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + use chrono::{Duration, Utc}; use parse_datetime::{from_str, from_str_at_date, ParseDurationError};