diff --git a/README.md b/README.md index 2177d0a..89d9d43 100644 --- a/README.md +++ b/README.md @@ -42,58 +42,53 @@ This will be the datetime parsing logic for [pydantic-core](https://github.com/p ```rust use speedate::{DateTime, Date, Time}; -fn main() { - let dt = DateTime::parse_str("2022-01-01T12:13:14Z").unwrap(); - assert_eq!( - dt, - DateTime { - date: Date { - year: 2022, - month: 1, - day: 1, - }, - time: Time { - hour: 12, - minute: 13, - second: 14, - microsecond: 0, - tz_offset: Some(0), - }, - } - ); - assert_eq!(dt.to_string(), "2022-01-01T12:13:14Z"); -} +let dt = DateTime::parse_str("2022-01-01T12:13:14Z").unwrap(); +assert_eq!( + dt, + DateTime { + date: Date { + year: 2022, + month: 1, + day: 1, + }, + time: Time { + hour: 12, + minute: 13, + second: 14, + microsecond: 0, + tz_offset: Some(0), + }, + } +); +assert_eq!(dt.to_string(), "2022-01-01T12:13:14Z"); ``` To control the specifics of time parsing you can use provide a `TimeConfig`: ```rust use speedate::{DateTime, Date, Time, TimeConfig}; - -fn main() { - let dt = DateTime::parse_bytes_with_config( - "1689102037.5586429".as_bytes(), - &TimeConfig::builder().unix_timestamp_offset(Some(0)).build(), - ).unwrap(); - assert_eq!( - dt, - DateTime { - date: Date { - year: 2023, - month: 7, - day: 11, - }, - time: Time { - hour: 19, - minute: 0, - second: 37, - microsecond: 558643, - tz_offset: Some(0), - }, - } - ); - assert_eq!(dt.to_string(), "2023-07-11T19:00:37.558643Z"); -} +let dt = DateTime::parse_bytes_with_config( + "1689102037.5586429".as_bytes(), + &TimeConfig::builder().unix_timestamp_offset(Some(0)).build(), +).unwrap(); +assert_eq!( + dt, + DateTime { + date: Date { + year: 2023, + month: 7, + day: 11, + }, + time: Time { + hour: 19, + minute: 0, + second: 37, + microsecond: 558643, + tz_offset: Some(0), + }, + } +); +assert_eq!(dt.to_string(), "2023-07-11T19:00:37.558643Z"); ``` ## Performance diff --git a/src/duration.rs b/src/duration.rs index 8fde0dc..c45d776 100644 --- a/src/duration.rs +++ b/src/duration.rs @@ -298,10 +298,13 @@ impl Duration { }; let mut d = match bytes.get(offset).copied() { Some(b'P') => Self::parse_iso_duration(bytes, offset + 1), - _ => match bytes.get(offset + 2).copied() { - Some(b':') => Self::parse_time(bytes, offset, config), - _ => Self::parse_days_time(bytes, offset), - }, + _ => { + if Self::is_duration_date_format(bytes) || bytes.len() < 5 { + Self::parse_days_time(bytes, offset) + } else { + Self::parse_time(bytes, offset, config) + } + } }?; d.positive = positive; @@ -420,6 +423,10 @@ impl Duration { }) } + fn is_duration_date_format(bytes: &[u8]) -> bool { + bytes.iter().any(|&byte| byte == b'd' || byte == b'D') + } + fn parse_days_time(bytes: &[u8], offset: usize) -> Result { let (day, offset) = match bytes.get(offset).copied() { Some(c) => Self::parse_number(bytes, c, offset), @@ -498,16 +505,63 @@ impl Duration { } fn parse_time(bytes: &[u8], offset: usize, config: &TimeConfig) -> Result { - let t = crate::time::PureTime::parse(bytes, offset, config)?; + let byte_len = bytes.len(); + if byte_len - offset < 5 { + return Err(ParseError::TooShort); + } + const HOUR_NUMERIC_LIMIT: i64 = 24 * 10i64.pow(8); + let mut hour: i64 = 0; + + let mut chunks = bytes + .get(offset..) + .ok_or(ParseError::TooShort)? + .splitn(2, |&byte| byte == b':'); + + // can just use `.split_once()` in future maybe, if that stabilises + let (hour_part, mut remaining) = match (chunks.next(), chunks.next(), chunks.next()) { + (_, _, Some(_)) | (None, _, _) => unreachable!("should always be 1 or 2 chunks"), + (Some(_hour_part), None, _) => return Err(ParseError::InvalidCharHour), + (Some(hour_part), Some(remaining), None) => (hour_part, remaining), + }; + + // > 9.999.999.999 + if hour_part.len() > 10 { + return Err(ParseError::DurationHourValueTooLarge); + } - if bytes.len() > t.position { + for byte in hour_part { + let h = *byte - b'0'; + if h > 9 { + return Err(ParseError::InvalidCharHour); + } + hour = (hour * 10) + (h as i64); + } + if hour > HOUR_NUMERIC_LIMIT { + return Err(ParseError::DurationHourValueTooLarge); + } + + let mut new_bytes = *b"00:00:00.000000"; + if 3 + remaining.len() > new_bytes.len() { + match config.microseconds_precision_overflow_behavior { + crate::MicrosecondsPrecisionOverflowBehavior::Truncate => remaining = &remaining[..new_bytes.len() - 3], + crate::MicrosecondsPrecisionOverflowBehavior::Error => return Err(ParseError::SecondFractionTooLong), + } + } + let new_bytes = &mut new_bytes[..3 + remaining.len()]; + new_bytes[3..].copy_from_slice(remaining); + + let t = crate::time::PureTime::parse(new_bytes, 0, config)?; + + if new_bytes.len() > t.position { return Err(ParseError::ExtraCharacters); } + let day = hour as u32 / 24; + hour %= 24; Ok(Self { positive: false, // is set above - day: 0, - second: t.total_seconds(), + day, + second: t.total_seconds() + (hour as u32) * 3_600, microsecond: t.microsecond, }) } diff --git a/src/lib.rs b/src/lib.rs index 5017613..dcbc06a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -133,6 +133,8 @@ pub enum ParseError { /// a numeric value in the duration is too large DurationValueTooLarge, /// durations may not exceed 999,999,999 days + DurationHourValueTooLarge, + /// durations hours must less than 1,000,000,000 DurationDaysTooLarge, /// dates before 1600 are not supported as unix timestamps DateTooSmall, diff --git a/tests/main.rs b/tests/main.rs index 7f43263..f9ecc6c 100644 --- a/tests/main.rs +++ b/tests/main.rs @@ -1153,7 +1153,12 @@ param_tests! { duration_time_fraction: ok => "00:01:03.123", "PT1M3.123S"; duration_time_extra: err => "00:01:03.123x", ExtraCharacters; duration_time_timezone: err => "00:01:03x", ExtraCharacters; - duration_time_invalid_hour: err => "24:01:03", OutOfRangeHour; + duration_time_more_than_24_hour: ok => "24:01:03", "P1DT1M3S"; + duration_time_way_more_than_24_hour: ok => "2400000000:01:03", "P273972Y220DT1M3S"; + duration_time_way_more_than_24_hour_long_fraction: ok => "2400000000:01:03.654321", "P273972Y220DT1M3.654321S"; + duration_time_invalid_over_limit_hour: err => "100000000000:01:03", DurationHourValueTooLarge; + duration_time_overflow_hour: err => "100000000000000000000000:01:03", DurationHourValueTooLarge; + duration_time_invalid_format_hour: err => "1000xxx000:01:03", InvalidCharHour; duration_time_invalid_minute: err => "00:60:03", OutOfRangeMinute; duration_time_invalid_second: err => "00:00:60", OutOfRangeSecond; duration_time_fraction_too_long: err => "00:00:00.1234567", SecondFractionTooLong;