From a2d8a741f10e0935698e3cba0eccc83f93e04608 Mon Sep 17 00:00:00 2001 From: David Hewitt Date: Mon, 4 Nov 2024 11:33:13 +0000 Subject: [PATCH] return an error on invalid empty durations `+PT` and `-PT` (#75) * clean up some slice parsing * add another broken test case * clippy * more cleanups --- src/duration.rs | 170 ++++++++++++++++++++++-------------------------- tests/main.rs | 7 ++ 2 files changed, 84 insertions(+), 93 deletions(-) diff --git a/src/duration.rs b/src/duration.rs index b5adff8..d8d47a3 100644 --- a/src/duration.rs +++ b/src/duration.rs @@ -290,22 +290,21 @@ impl Duration { /// ``` #[inline] pub fn parse_bytes_with_config(bytes: &[u8], config: &TimeConfig) -> Result { - let (positive, offset) = match bytes.first().copied() { - Some(b'+') => (true, 1), - Some(b'-') => (false, 1), - None => return Err(ParseError::TooShort), - _ => (true, 0), + let (positive, bytes) = match bytes { + [b'-', bytes @ ..] => (false, bytes), + [b'+', bytes @ ..] | bytes => (true, bytes), }; - let mut d = match bytes.get(offset).copied() { - Some(b'P') => Self::parse_iso_duration(bytes, offset + 1), - _ => { + let mut d = match bytes { + [] => return Err(ParseError::TooShort), + [b'P', iso_duration @ ..] => Self::parse_iso_duration(iso_duration)?, + bytes => { if Self::is_duration_date_format(bytes) || bytes.len() < 5 { - Self::parse_days_time(bytes, offset) + Self::parse_days_time(bytes, config)? } else { - Self::parse_time(bytes, offset, config) + Self::parse_time(bytes, config)? } } - }?; + }; d.positive = positive; d.normalize()?; @@ -348,10 +347,11 @@ impl Duration { } } - fn parse_iso_duration(bytes: &[u8], offset: usize) -> Result { + /// Parse ISO duration (excluding the 'P' prefix) + fn parse_iso_duration(bytes: &[u8]) -> Result { let mut got_t = false; let mut last_had_fraction = false; - let mut position: usize = offset; + let mut position: usize = 0; let mut day: u32 = 0; let mut second: u32 = 0; let mut microsecond: u32 = 0; @@ -364,14 +364,14 @@ impl Duration { got_t = true; } Some(c) => { - let (value, op_fraction, new_pos) = Self::parse_number_frac(bytes, c, position)?; + let (value, op_fraction, offset) = Self::parse_number_frac(&bytes[position..], c)?; if last_had_fraction { return Err(ParseError::DurationInvalidFraction); } if op_fraction.is_some() { last_had_fraction = true; } - position = new_pos; + position += offset; if got_t { let mult: u32 = match bytes.get(position).copied() { Some(b'H') => 3600, @@ -411,7 +411,8 @@ impl Duration { } position += 1; } - if position < 3 { + // require at least one field + if position < 2 { return Err(ParseError::TooShort); } @@ -427,99 +428,83 @@ impl Duration { bytes.iter().any(|&byte| byte == b'd' || byte == b'D') } - fn parse_days_time(bytes: &[u8], offset: usize) -> Result { - let (day, offset) = match bytes.get(offset).copied() { - Some(c) => Self::parse_number(bytes, c, offset), + fn parse_days_time(bytes: &[u8], config: &TimeConfig) -> Result { + let (day, position) = match bytes.first().copied() { + Some(c) => Self::parse_number(bytes, c), _ => Err(ParseError::TooShort), }?; - let mut position = offset; - // consume a space, but allow for "d/D" - position += match bytes.get(position).copied() { - Some(b' ') => 1, - Some(b'd') | Some(b'D') => 0, - _ => return Err(ParseError::DurationInvalidDays), + let Some( + // expect d or D next, optionally prefixed by a space + [b' ', b'd' | b'D', remaining @ ..] | [b'd' | b'D', remaining @ ..], + ) = bytes.get(position..) + else { + return Err(ParseError::DurationInvalidDays); }; - // consume "d/D", nothing else is allowed - position += match bytes.get(position).copied() { - Some(b'd') | Some(b'D') => 1, - _ => return Err(ParseError::DurationInvalidDays), - }; - - macro_rules! days_only { - ($day:ident) => { - Ok(Self { - positive: false, // is set above - day: $day, - second: 0, - microsecond: 0, - }) - }; - } - // optionally consume the rest of the word "day/days" - position += match bytes.get(position).copied() { - Some(b'a') | Some(b'A') => { - match bytes.get(position + 1).copied() { - Some(b'y') | Some(b'Y') => (), - _ => return Err(ParseError::DurationInvalidDays), - }; - match bytes.get(position + 2).copied() { - Some(b's') | Some(b'S') => 3, - None => return days_only!(day), - _ => 2, - } + let remaining = match remaining { + // ays + [b'a', b'y', b's', remaining @ ..] + // ay + | [b'a', b'y', remaining @ ..] + // AYS + | [b'A', b'Y', b'S', remaining @ ..] + // AY + | [b'A', b'Y', remaining @ ..] + => { + remaining } - None => return days_only!(day), - _ => 0, + // word continued but not finished correctly + [b'a', ..] | [b'A', ..] => return Err(ParseError::DurationInvalidDays), + remaining => remaining, }; - // optionally consume a comma "," - position += match bytes.get(position).copied() { - Some(b',') => 1, - None => return days_only!(day), - _ => 0, + // days are finished, next prepare to parse the time + // optionally consume a comma "," and maybe a space + let remaining = match remaining { + // b", " + [b',', b' ', remaining @ ..] + // b"," + | [b',', remaining @ ..] + // b" " + | [b' ', remaining @ ..] + // no comma / space + | remaining => remaining, }; - // optionally consume a space " " - position += match bytes.get(position).copied() { - Some(b' ') => 1, - None => return days_only!(day), - _ => 0, - }; - - match bytes.get(position).copied() { - Some(_) => { - let t = Self::parse_time(bytes, position, &TimeConfigBuilder::new().build())?; - if t.day > 0 { - // 1d 24:00:00 is not allowed - return Err(ParseError::DurationHourValueTooLarge); - } + if remaining.is_empty() { + return Ok(Self { + positive: false, // is set above + day, + second: 0, + microsecond: 0, + }); + } - Ok(Self { - positive: false, // is set above - day, - second: t.second, - microsecond: t.microsecond, - }) - } - None => days_only!(day), + let t = Self::parse_time(remaining, config)?; + if t.day > 0 { + // 1d 24:00:00 is not allowed + return Err(ParseError::DurationHourValueTooLarge); } + + Ok(Self { + positive: false, // is set above + day, + second: t.second, + microsecond: t.microsecond, + }) } - fn parse_time(bytes: &[u8], offset: usize, config: &TimeConfig) -> Result { + fn parse_time(bytes: &[u8], config: &TimeConfig) -> Result { let byte_len = bytes.len(); - if byte_len - offset < 5 { + if byte_len < 5 { return Err(ParseError::TooShort); } const HOUR_NUMERIC_LIMIT: i64 = 24 * 10i64.pow(8); let mut hour: i64 = 0; - let mut chunks = bytes - .get(offset..) - .ok_or(ParseError::TooShort)? - .splitn(2, |&byte| byte == b':'); + let mut chunks = bytes.splitn(2, |&byte| byte == b':'); // can just use `.split_once()` in future maybe, if that stabilises let (hour_part, mut remaining) = match (chunks.next(), chunks.next(), chunks.next()) { @@ -570,12 +555,12 @@ impl Duration { }) } - fn parse_number(bytes: &[u8], d1: u8, offset: usize) -> Result<(u32, usize), ParseError> { + fn parse_number(bytes: &[u8], d1: u8) -> Result<(u32, usize), ParseError> { let mut value = match d1 { c if d1.is_ascii_digit() => (c - b'0') as u32, _ => return Err(ParseError::DurationInvalidNumber), }; - let mut position = offset + 1; + let mut position = 1; loop { match bytes.get(position) { Some(c) if c.is_ascii_digit() => { @@ -588,9 +573,8 @@ impl Duration { } } - fn parse_number_frac(bytes: &[u8], d1: u8, offset: usize) -> Result<(u32, Option, usize), ParseError> { - let (value, offset) = Self::parse_number(bytes, d1, offset)?; - let mut position = offset; + fn parse_number_frac(bytes: &[u8], d1: u8) -> Result<(u32, Option, usize), ParseError> { + let (value, mut position) = Self::parse_number(bytes, d1)?; let next_char = bytes.get(position).copied(); if next_char == Some(b'.') || next_char == Some(b',') { let mut decimal = 0_f64; diff --git a/tests/main.rs b/tests/main.rs index 259ec3d..fb42b37 100644 --- a/tests/main.rs +++ b/tests/main.rs @@ -1171,6 +1171,7 @@ param_tests! { duration_too_short1: err => "", TooShort; duration_too_short2: err => "+", TooShort; duration_too_short3: err => "P", TooShort; + duration_too_short4: err => "+PT", TooShort; duration_1y: ok => "P1Y", "P1Y"; duration_123y: ok => "P123Y", "P123Y"; duration_123_8y: ok => "P123.8Y", "P123Y292D"; @@ -1185,6 +1186,12 @@ param_tests! { duration_fraction2: ok => "P1Y1DT2H0.5S", "P1Y1DT2H0.5S"; duration_1: ok => "P1DT1S", "P1DT1S"; duration_all: ok => "P1Y2M3DT4H5M6S", "P1Y63DT4H5M6S"; + // FIXME: this is current behaviour, but we should probably error on + // out-of order durations (not RFC3339 compliant) + duration_all_wrong_order: ok => "P3D2M1YT6S5M4H", "P1Y63DT4H5M6S"; + // FIXME: this is current behaviour, but we should probably error on + // repeated units (not RFC3339 compliant) + duration_unit_repeated: ok => "P1Y2Y", "P3Y"; duration: err => "PD", DurationInvalidNumber; duration: err => "P1DT1MT1S", DurationTRepeated; duration: err => "P1DT1.1M1S", DurationInvalidFraction;