Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Timestamp: Support parsing without separators #453

Merged
merged 1 commit into from
Sep 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Fixed
- **MusePack**: Fix potential panic when the beginning silence makes up the entire sample count ([PR](https://github.com/Serial-ATA/lofty-rs/pull/449))
- **Timestamp**: Support timestamps without separators (ex. "20240906" vs "2024-09-06") ([issue](https://github.com/Serial-ATA/lofty-rs/issues/452)) ([PR](https://github.com/Serial-ATA/lofty-rs/issues/453))

## [0.21.1] - 2024-08-28

Expand Down
114 changes: 105 additions & 9 deletions lofty/src/tag/items/timestamp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,8 @@ impl Timestamp {
/// The maximum length of a timestamp in bytes
pub const MAX_LENGTH: usize = 19;

const SEPARATORS: [u8; 3] = [b'-', b'T', b':'];

/// Read a [`Timestamp`]
///
/// NOTE: This will take [`Self::MAX_LENGTH`] bytes from the reader. Ensure that it only contains the timestamp
Expand All @@ -94,10 +96,8 @@ impl Timestamp {
macro_rules! read_segment {
($expr:expr) => {
match $expr {
Ok((_, 0)) => break,
Ok((val, _)) => Some(val as u8),
Err(LoftyError {
kind: ErrorKind::Io(io),
}) if matches!(io.kind(), std::io::ErrorKind::UnexpectedEof) => break,
Err(e) => return Err(e.into()),
}
};
Expand All @@ -118,6 +118,12 @@ impl Timestamp {
return Ok(None);
}

// It is valid for a timestamp to contain no separators, but this will lower our tolerance
// for common mistakes. We ignore the "T" separator here because it is **ALWAYS** required.
let timestamp_contains_separators = content
.iter()
.any(|&b| b != b'T' && Self::SEPARATORS.contains(&b));

let reader = &mut &content[..];

// We need to very that the year is exactly 4 bytes long. This doesn't matter for other segments.
Expand All @@ -129,14 +135,33 @@ impl Timestamp {
}

timestamp.year = year;
if reader.is_empty() {
return Ok(Some(timestamp));
}

#[allow(clippy::never_loop)]
loop {
timestamp.month = read_segment!(Self::segment::<2>(reader, Some(b'-'), parse_mode));
timestamp.day = read_segment!(Self::segment::<2>(reader, Some(b'-'), parse_mode));
timestamp.month = read_segment!(Self::segment::<2>(
reader,
timestamp_contains_separators.then_some(b'-'),
parse_mode
));
timestamp.day = read_segment!(Self::segment::<2>(
reader,
timestamp_contains_separators.then_some(b'-'),
parse_mode
));
timestamp.hour = read_segment!(Self::segment::<2>(reader, Some(b'T'), parse_mode));
timestamp.minute = read_segment!(Self::segment::<2>(reader, Some(b':'), parse_mode));
timestamp.second = read_segment!(Self::segment::<2>(reader, Some(b':'), parse_mode));
timestamp.minute = read_segment!(Self::segment::<2>(
reader,
timestamp_contains_separators.then_some(b':'),
parse_mode
));
timestamp.second = read_segment!(Self::segment::<2>(
reader,
timestamp_contains_separators.then_some(b':'),
parse_mode
));
break;
}

Expand All @@ -148,7 +173,9 @@ impl Timestamp {
sep: Option<u8>,
parse_mode: ParsingMode,
) -> Result<(u16, usize)> {
const SEPARATORS: [u8; 3] = [b'-', b'T', b':'];
if content.is_empty() {
return Ok((0, 0));
}

if let Some(sep) = sep {
let byte = content.read_u8()?;
Expand Down Expand Up @@ -181,7 +208,10 @@ impl Timestamp {
//
// The easiest way to check for a missing digit is to see if we're just eating into
// the next segment's separator.
if sep.is_some() && SEPARATORS.contains(&i) && parse_mode != ParsingMode::Strict {
if sep.is_some()
&& Self::SEPARATORS.contains(&i)
&& parse_mode != ParsingMode::Strict
{
break;
}

Expand Down Expand Up @@ -370,4 +400,70 @@ mod tests {
let empty_timestamp_strict = Timestamp::parse(&mut "".as_bytes(), ParsingMode::Strict);
assert!(empty_timestamp_strict.is_err());
}

#[test_log::test]
fn timestamp_no_separators() {
let timestamp = "20240603T140849";
let parsed_timestamp =
Timestamp::parse(&mut timestamp.as_bytes(), ParsingMode::BestAttempt).unwrap();
assert_eq!(parsed_timestamp, Some(expected()));
}

#[test_log::test]
fn timestamp_decode_partial_no_separators() {
let partial_timestamps: [(&[u8], Timestamp); 6] = [
(
b"2024",
Timestamp {
year: 2024,
..Timestamp::default()
},
),
(
b"202406",
Timestamp {
year: 2024,
month: Some(6),
..Timestamp::default()
},
),
(
b"20240603",
Timestamp {
year: 2024,
month: Some(6),
day: Some(3),
..Timestamp::default()
},
),
(
b"20240603T14",
Timestamp {
year: 2024,
month: Some(6),
day: Some(3),
hour: Some(14),
..Timestamp::default()
},
),
(
b"20240603T1408",
Timestamp {
year: 2024,
month: Some(6),
day: Some(3),
hour: Some(14),
minute: Some(8),
..Timestamp::default()
},
),
(b"20240603T140849", expected()),
];

for (data, expected) in partial_timestamps {
let parsed_timestamp = Timestamp::parse(&mut &data[..], ParsingMode::Strict)
.unwrap_or_else(|e| panic!("{e}: {}", std::str::from_utf8(data).unwrap()));
assert_eq!(parsed_timestamp, Some(expected));
}
}
}