Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Parsing preparations #1479

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions src/datetime/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1788,6 +1788,28 @@ where
}
}

/// Accepts a relaxed form of RFC3339.
/// A space or a 'T' are acepted as the separator between the date and time
/// parts. Additional spaces are allowed between each component.
///
/// All of these examples are equivalent:
/// ```
/// # use chrono::{DateTime, offset::FixedOffset};
/// "2012-12-12T12:12:12Z".parse::<DateTime<FixedOffset>>()?;
/// "2012-12-12 12:12:12Z".parse::<DateTime<FixedOffset>>()?;
/// "2012- 12-12T12: 12:12Z".parse::<DateTime<FixedOffset>>()?;
/// # Ok::<(), chrono::ParseError>(())
/// ```
impl str::FromStr for DateTime<FixedOffset> {
type Err = ParseError;

fn from_str(s: &str) -> ParseResult<DateTime<FixedOffset>> {
let mut parsed = Parsed::new();
parse(&mut parsed, s, [Item::Fixed(Fixed::RFC3339), Item::Space("")].iter())?;
parsed.to_datetime()
}
}

/// Accepts a relaxed form of RFC3339.
/// A space or a 'T' are accepted as the separator between the date and time
/// parts.
Expand Down
48 changes: 7 additions & 41 deletions src/format/parse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ use super::scan;
use super::{Fixed, InternalFixed, InternalInternal, Item, Numeric, Pad, Parsed};
use super::{ParseError, ParseResult};
use super::{BAD_FORMAT, INVALID, OUT_OF_RANGE, TOO_LONG, TOO_SHORT};
use crate::{DateTime, FixedOffset, Weekday};
use crate::Weekday;

fn set_weekday_with_num_days_from_sunday(p: &mut Parsed, v: i64) -> ParseResult<()> {
p.set_weekday(match v {
Expand Down Expand Up @@ -357,7 +357,7 @@ where
Minute => (2, false, Parsed::set_minute),
Second => (2, false, Parsed::set_second),
Nanosecond => (9, false, Parsed::set_nanosecond),
Timestamp => (usize::MAX, false, Parsed::set_timestamp),
Timestamp => (usize::MAX, true, Parsed::set_timestamp),

// for the future expansion
Internal(ref int) => match int._dummy {},
Expand All @@ -366,8 +366,7 @@ where
s = s.trim_start();
let v = if signed {
if s.starts_with('-') {
let v = try_consume!(scan::number(&s[1..], 1, usize::MAX));
0i64.checked_sub(v).ok_or(OUT_OF_RANGE)?
try_consume!(scan::negative_number(&s[1..], 1, usize::MAX))
} else if s.starts_with('+') {
try_consume!(scan::number(&s[1..], 1, usize::MAX))
} else {
Expand Down Expand Up @@ -425,25 +424,16 @@ where
}

&Internal(InternalFixed { val: InternalInternal::Nanosecond3NoDot }) => {
if s.len() < 3 {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you explain in the commit message why they're duplicate (that is, how these invariants are upheld)? It's not clear in the diff.

return Err(TOO_SHORT);
}
let nano = try_consume!(scan::nanosecond_fixed(s, 3));
parsed.set_nanosecond(nano)?;
}

&Internal(InternalFixed { val: InternalInternal::Nanosecond6NoDot }) => {
if s.len() < 6 {
return Err(TOO_SHORT);
}
let nano = try_consume!(scan::nanosecond_fixed(s, 6));
parsed.set_nanosecond(nano)?;
}

&Internal(InternalFixed { val: InternalInternal::Nanosecond9NoDot }) => {
if s.len() < 9 {
return Err(TOO_SHORT);
}
let nano = try_consume!(scan::nanosecond_fixed(s, 9));
parsed.set_nanosecond(nano)?;
}
Expand Down Expand Up @@ -508,31 +498,6 @@ where
Ok(s)
}

/// Accepts a relaxed form of RFC3339.
/// A space or a 'T' are acepted as the separator between the date and time
/// parts. Additional spaces are allowed between each component.
///
/// All of these examples are equivalent:
/// ```
/// # use chrono::{DateTime, offset::FixedOffset};
/// "2012-12-12T12:12:12Z".parse::<DateTime<FixedOffset>>()?;
/// "2012-12-12 12:12:12Z".parse::<DateTime<FixedOffset>>()?;
/// "2012- 12-12T12: 12:12Z".parse::<DateTime<FixedOffset>>()?;
/// # Ok::<(), chrono::ParseError>(())
/// ```
impl str::FromStr for DateTime<FixedOffset> {
type Err = ParseError;

fn from_str(s: &str) -> ParseResult<DateTime<FixedOffset>> {
let mut parsed = Parsed::new();
let (s, _) = parse_rfc3339_relaxed(&mut parsed, s)?;
if !s.trim_start().is_empty() {
return Err(TOO_LONG);
}
parsed.to_datetime()
}
}

/// Accepts a relaxed form of RFC3339.
///
/// Differences with RFC3339:
Expand Down Expand Up @@ -790,6 +755,7 @@ mod tests {
check(" + 42", &[Space(" "), num(Year)], Err(INVALID));
check("-", &[num(Year)], Err(TOO_SHORT));
check("+", &[num(Year)], Err(TOO_SHORT));
check("-9223372036854775808", &[num(Timestamp)], parsed!(timestamp: i64::MIN));

// unsigned numeric
check("345", &[num(Ordinal)], parsed!(ordinal: 345));
Expand Down Expand Up @@ -1317,7 +1283,7 @@ mod tests {
check("12345678", &[internal_fixed(TimezoneOffsetPermissive)], Err(INVALID));
check("+1", &[internal_fixed(TimezoneOffsetPermissive)], Err(TOO_SHORT));
check("+12", &[internal_fixed(TimezoneOffsetPermissive)], parsed!(offset: 43_200));
check("+123", &[internal_fixed(TimezoneOffsetPermissive)], Err(TOO_SHORT));
check("+123", &[internal_fixed(TimezoneOffsetPermissive)], Err(TOO_LONG));
check("+1234", &[internal_fixed(TimezoneOffsetPermissive)], parsed!(offset: 45_240));
check("-1234", &[internal_fixed(TimezoneOffsetPermissive)], parsed!(offset: -45_240));
check("−1234", &[internal_fixed(TimezoneOffsetPermissive)], parsed!(offset: -45_240)); // MINUS SIGN (U+2212)
Expand All @@ -1334,7 +1300,7 @@ mod tests {
check("12:34:56", &[internal_fixed(TimezoneOffsetPermissive)], Err(INVALID));
check("+1:", &[internal_fixed(TimezoneOffsetPermissive)], Err(INVALID));
check("+12:", &[internal_fixed(TimezoneOffsetPermissive)], parsed!(offset: 43_200));
check("+12:3", &[internal_fixed(TimezoneOffsetPermissive)], Err(TOO_SHORT));
check("+12:3", &[internal_fixed(TimezoneOffsetPermissive)], Err(TOO_LONG));
check("+12:34", &[internal_fixed(TimezoneOffsetPermissive)], parsed!(offset: 45_240));
check("-12:34", &[internal_fixed(TimezoneOffsetPermissive)], parsed!(offset: -45_240));
check("−12:34", &[internal_fixed(TimezoneOffsetPermissive)], parsed!(offset: -45_240)); // MINUS SIGN (U+2212)
Expand Down Expand Up @@ -1384,7 +1350,7 @@ mod tests {
);
check("🤠+12:34", &[internal_fixed(TimezoneOffsetPermissive)], Err(INVALID));
check("+12:34🤠", &[internal_fixed(TimezoneOffsetPermissive)], Err(TOO_LONG));
check("+12:🤠34", &[internal_fixed(TimezoneOffsetPermissive)], Err(INVALID));
check("+12:🤠34", &[internal_fixed(TimezoneOffsetPermissive)], Err(TOO_LONG));
check(
"+12:34🤠",
&[internal_fixed(TimezoneOffsetPermissive), Literal("🤠")],
Expand Down
76 changes: 41 additions & 35 deletions src/format/scan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,28 @@
/// Any number that does not fit in `i64` is an error.
#[inline]
pub(super) fn number(s: &str, min: usize, max: usize) -> ParseResult<(&str, i64)> {
let (s, n) = unsigned_number(s, min, max)?;
Ok((s, n.try_into().map_err(|_| OUT_OF_RANGE)?))
}

/// Tries to parse the negative number from `min` to `max` digits.
///
/// The absence of digits at all is an unconditional error.
/// More than `max` digits are consumed up to the first `max` digits.
/// Any number that does not fit in `i64` is an error.
#[inline]
pub(super) fn negative_number(s: &str, min: usize, max: usize) -> ParseResult<(&str, i64)> {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd prefer to inline this in the caller (more like before), since there aren't any other users, anyway.

Is there a way to more directly express the precondition for negating the number? This feels kind of roundabout -- even the previous 0i64.checked_sub(v).ok_or() feels clearer.

let (s, n) = unsigned_number(s, min, max)?;
let signed_neg = (n as i64).wrapping_neg();
if !signed_neg.is_negative() {
return Err(OUT_OF_RANGE);

Check warning on line 32 in src/format/scan.rs

View check run for this annotation

Codecov / codecov/patch

src/format/scan.rs#L32

Added line #L32 was not covered by tests
}
Ok((s, signed_neg))
}

/// Tries to parse a number from `min` to `max` digits as an unsigned integer.
#[inline]
pub(super) fn unsigned_number(s: &str, min: usize, max: usize) -> ParseResult<(&str, u64)> {
assert!(min <= max);

// We are only interested in ascii numbers, so we can work with the `str` as bytes. We stop on
Expand All @@ -25,7 +47,7 @@
return Err(TOO_SHORT);
}

let mut n = 0i64;
let mut n = 0u64;
for (i, c) in bytes.iter().take(max).cloned().enumerate() {
// cloned() = copied()
if !c.is_ascii_digit() {
Expand All @@ -36,7 +58,7 @@
}
}

n = match n.checked_mul(10).and_then(|n| n.checked_add((c - b'0') as i64)) {
n = match n.checked_mul(10).and_then(|n| n.checked_add((c - b'0') as u64)) {
Some(n) => n,
None => return Err(OUT_OF_RANGE),
};
Expand Down Expand Up @@ -181,7 +203,7 @@
}

/// Consumes any number (including zero) of colon or spaces.
pub(crate) fn colon_or_space(s: &str) -> ParseResult<&str> {
pub(super) fn colon_or_space(s: &str) -> ParseResult<&str> {
Ok(s.trim_start_matches(|c: char| c == ':' || c.is_whitespace()))
}

Expand All @@ -199,7 +221,7 @@
/// This is part of [RFC 3339 & ISO 8601].
///
/// [RFC 3339 & ISO 8601]: https://en.wikipedia.org/w/index.php?title=ISO_8601&oldid=1114309368#Time_offsets_from_UTC
pub(crate) fn timezone_offset<F>(
pub(super) fn timezone_offset<F>(
mut s: &str,
mut consume_colon: F,
allow_zulu: bool,
Expand All @@ -215,67 +237,51 @@
}
}

const fn digits(s: &str) -> ParseResult<(u8, u8)> {
fn digits(s: &str) -> ParseResult<u8> {
let b = s.as_bytes();
if b.len() < 2 {
Err(TOO_SHORT)
} else {
Ok((b[0], b[1]))
return Err(TOO_SHORT);
}
match (b[0], b[1]) {
(h1 @ b'0'..=b'9', h2 @ b'0'..=b'9') => Ok((h1 - b'0') * 10 + (h2 - b'0')),
_ => Err(INVALID),
}
}
let negative = match s.chars().next() {
Some('+') => {
// PLUS SIGN (U+2B)
s = &s['+'.len_utf8()..];

false
}
Some('-') => {
// HYPHEN-MINUS (U+2D)
s = &s['-'.len_utf8()..];

true
}
Some('−') => {
Some('−') if allow_tz_minus_sign => {
// MINUS SIGN (U+2212)
if !allow_tz_minus_sign {
return Err(INVALID);
}
s = &s['−'.len_utf8()..];

true
}
Some(_) => return Err(INVALID),
None => return Err(TOO_SHORT),
};

// hours (00--99)
let hours = match digits(s)? {
(h1 @ b'0'..=b'9', h2 @ b'0'..=b'9') => i32::from((h1 - b'0') * 10 + (h2 - b'0')),
_ => return Err(INVALID),
};
let hours = digits(s)? as i32;
s = &s[2..];

// colons (and possibly other separators)
s = consume_colon(s)?;

// minutes (00--59)
// if the next two items are digits then we have to add minutes
let minutes = if let Ok(ds) = digits(s) {
match ds {
(m1 @ b'0'..=b'5', m2 @ b'0'..=b'9') => i32::from((m1 - b'0') * 10 + (m2 - b'0')),
(b'6'..=b'9', b'0'..=b'9') => return Err(OUT_OF_RANGE),
_ => return Err(INVALID),
let minutes = match digits(s) {
Ok(m) if m >= 60 => return Err(OUT_OF_RANGE),
Ok(m) => {
s = &s[2..];
m as i32
}
} else if allow_missing_minutes {
0
} else {
return Err(TOO_SHORT);
};
s = match s.len() {
len if len >= 2 => &s[2..],
0 => s,
_ => return Err(TOO_SHORT),
Err(_) if allow_missing_minutes => 0,
Err(e) => return Err(e),
};

let seconds = hours * 3600 + minutes * 60;
Expand Down
8 changes: 5 additions & 3 deletions src/offset/fixed.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ use core::str::FromStr;
use rkyv::{Archive, Deserialize, Serialize};

use super::{MappedLocalTime, Offset, TimeZone};
use crate::format::{scan, ParseError, OUT_OF_RANGE};
use crate::format::{parse, Fixed, Item, ParseError, Parsed};
use crate::naive::{NaiveDate, NaiveDateTime};

/// The time zone with fixed offset, from UTC-23:59:59 to UTC+23:59:59.
Expand Down Expand Up @@ -118,9 +118,11 @@ impl FixedOffset {
/// Parsing a `str` into a `FixedOffset` uses the format [`%z`](crate::format::strftime).
impl FromStr for FixedOffset {
type Err = ParseError;

fn from_str(s: &str) -> Result<Self, Self::Err> {
let (_, offset) = scan::timezone_offset(s, scan::colon_or_space, false, false, true)?;
Self::east_opt(offset).ok_or(OUT_OF_RANGE)
let mut parsed = Parsed::new();
parse(&mut parsed, s, [Item::Fixed(Fixed::TimezoneOffset)].iter())?;
parsed.to_fixed_offset()
}
}

Expand Down
Loading