diff --git a/CHANGELOG.md b/CHANGELOG.md index 2968c6ee..5191f9ee 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -28,7 +28,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed - **MusePack**: Fix potential panic when the beginning silence makes up the entire sample count ([PR](https://github.com/Serial-ATA/lofty-rs/pull/449)) -- **Timestamp**: Support timestamps without separators (ex. "20240906" vs "2024-09-06") ([issue](https://github.com/Serial-ATA/lofty-rs/issues/452)) ([PR](https://github.com/Serial-ATA/lofty-rs/issues/453)) +- **Timestamp**: + - Support timestamps without separators (ex. "20240906" vs "2024-09-06") ([issue](https://github.com/Serial-ATA/lofty-rs/issues/452)) ([PR](https://github.com/Serial-ATA/lofty-rs/pull/453)) + - `Timestamp::parse` will now short-circuit when possible in `ParsingMode::{BestAttempt, Relaxed}` ([issue](https://github.com/Serial-ATA/lofty-rs/issues/462)) ([PR](https://github.com/Serial-ATA/lofty-rs/pull/463)) + - For example, the timestamp "2024-06-03 14:08:49" contains a space instead of the required "T" marker. + In `ParsingMode::Strict`, this would be an error. Otherwise, the parser will just stop once it hits the space + and return the timestamp up to that point. - **ID3v2**: - `ItemKey::Director` will now be written correctly as a TXXX frame ([PR](https://github.com/Serial-ATA/lofty-rs/issues/454)) - When skipping invalid frames in `ParsingMode::{BestAttempt, Relaxed}`, the parser will no longer be able to go out of the bounds diff --git a/lofty/src/id3/v2/items/timestamp_frame.rs b/lofty/src/id3/v2/items/timestamp_frame.rs index d4997350..4ff15302 100644 --- a/lofty/src/id3/v2/items/timestamp_frame.rs +++ b/lofty/src/id3/v2/items/timestamp_frame.rs @@ -1,5 +1,5 @@ use crate::config::ParsingMode; -use crate::error::{ErrorKind, LoftyError, Result}; +use crate::error::Result; use crate::id3::v2::{FrameFlags, FrameHeader, FrameId}; use crate::macros::err; use crate::tag::items::Timestamp; @@ -77,14 +77,18 @@ impl<'a> TimestampFrame<'a> { return Ok(None); }; let Some(encoding) = TextEncoding::from_u8(encoding_byte) else { - return Err(LoftyError::new(ErrorKind::TextDecode( - "Found invalid encoding", - ))); + if parse_mode != ParsingMode::Relaxed { + err!(TextDecode("Found invalid encoding")) + } + return Ok(None); }; let value = decode_text(reader, TextDecodeOptions::new().encoding(encoding))?.content; if !value.is_ascii() { - err!(BadTimestamp("Timestamp contains non-ASCII characters")) + if parse_mode == ParsingMode::Strict { + err!(BadTimestamp("Timestamp contains non-ASCII characters")) + } + return Ok(None); } let header = FrameHeader::new(id, frame_flags); @@ -96,7 +100,18 @@ impl<'a> TimestampFrame<'a> { let reader = &mut value.as_bytes(); - let Some(timestamp) = Timestamp::parse(reader, parse_mode)? else { + let result; + match Timestamp::parse(reader, parse_mode) { + Ok(timestamp) => result = timestamp, + Err(e) => { + if parse_mode != ParsingMode::Relaxed { + return Err(e); + } + return Ok(None); + }, + } + + let Some(timestamp) = result else { // Timestamp is empty return Ok(None); }; @@ -105,7 +120,7 @@ impl<'a> TimestampFrame<'a> { Ok(Some(frame)) } - /// Convert an [`TimestampFrame`] to a byte vec + /// Convert a [`TimestampFrame`] to a byte vec /// /// # Errors /// diff --git a/lofty/src/tag/items/timestamp.rs b/lofty/src/tag/items/timestamp.rs index 411a05e8..d632bb5a 100644 --- a/lofty/src/tag/items/timestamp.rs +++ b/lofty/src/tag/items/timestamp.rs @@ -98,7 +98,7 @@ impl Timestamp { match $expr { Ok((_, 0)) => break, Ok((val, _)) => Some(val as u8), - Err(e) => return Err(e.into()), + Err(e) => return Err(e), } }; } @@ -126,7 +126,7 @@ impl Timestamp { let reader = &mut &content[..]; - // We need to very that the year is exactly 4 bytes long. This doesn't matter for other segments. + // We need to verify that the year is exactly 4 bytes long. This doesn't matter for other segments. let (year, bytes_read) = Self::segment::<4>(reader, None, parse_mode)?; if bytes_read != 4 { err!(BadTimestamp( @@ -173,22 +173,31 @@ impl Timestamp { sep: Option, parse_mode: ParsingMode, ) -> Result<(u16, usize)> { + const STOP_PARSING: (u16, usize) = (0, 0); + if content.is_empty() { - return Ok((0, 0)); + return Ok(STOP_PARSING); } if let Some(sep) = sep { let byte = content.read_u8()?; if byte != sep { - err!(BadTimestamp("Expected a separator")) + if parse_mode == ParsingMode::Strict { + err!(BadTimestamp("Expected a separator")) + } + return Ok(STOP_PARSING); } } if content.len() < SIZE { - err!(BadTimestamp("Timestamp segment is too short")) + if parse_mode == ParsingMode::Strict { + err!(BadTimestamp("Timestamp segment is too short")) + } + + return Ok(STOP_PARSING); } - let mut num = 0; + let mut num = None; let mut byte_count = 0; for i in content[..SIZE].iter().copied() { // Common spec violation: Timestamps may use spaces instead of zeros, so the month of June @@ -202,6 +211,8 @@ impl Timestamp { continue; } + // TODO: This is a spec violation for ID3v2, but not for ISO 8601 in general. Maybe consider + // making this a warning and allow it for all parsing modes? if !i.is_ascii_digit() { // Another spec violation, timestamps in the wild may not use a zero or a space, so // we would have to treat "06", "6", and " 6" as valid. @@ -220,13 +231,23 @@ impl Timestamp { )) } - num = num * 10 + u16::from(i - b'0'); + num = Some(num.unwrap_or(0) * 10 + u16::from(i - b'0')); byte_count += 1; } + let Some(parsed_num) = num else { + assert_ne!( + parse_mode, + ParsingMode::Strict, + "The timestamp segment is empty, the parser should've failed before this point." + ); + + return Ok(STOP_PARSING); + }; + *content = &content[byte_count..]; - Ok((num, byte_count)) + Ok((parsed_num, byte_count)) } pub(crate) fn verify(&self) -> Result<()> { @@ -316,21 +337,83 @@ mod tests { assert_eq!(timestamp.to_string().len(), 7); } + fn broken_timestamps() -> [(&'static [u8], Timestamp); 7] { + [ + ( + b"2024-", + Timestamp { + year: 2024, + ..Timestamp::default() + }, + ), + ( + b"2024-06-", + Timestamp { + year: 2024, + month: Some(6), + ..Timestamp::default() + }, + ), + ( + b"2024--", + Timestamp { + year: 2024, + ..Timestamp::default() + }, + ), + ( + b"2024- -", + Timestamp { + year: 2024, + ..Timestamp::default() + }, + ), + ( + b"2024-06-03T", + Timestamp { + year: 2024, + month: Some(6), + day: Some(3), + ..Timestamp::default() + }, + ), + ( + b"2024:06", + Timestamp { + year: 2024, + ..Timestamp::default() + }, + ), + ( + b"2024-0-", + Timestamp { + year: 2024, + month: Some(0), + ..Timestamp::default() + }, + ), + ] + } + #[test_log::test] - fn reject_broken_timestamps() { - let broken_timestamps: &[&[u8]] = &[ - b"2024-", - b"2024-06-", - b"2024--", - b"2024- -", - b"2024-06-03T", - b"2024:06", - b"2024-0-", - ]; + fn reject_broken_timestamps_strict() { + for (timestamp, _) in broken_timestamps() { + let parsed_timestamp = Timestamp::parse(&mut ×tamp[..], ParsingMode::Strict); + assert!(parsed_timestamp.is_err()); + } + } - for timestamp in broken_timestamps { + #[test_log::test] + fn accept_broken_timestamps_best_attempt() { + for (timestamp, partial_result) in broken_timestamps() { let parsed_timestamp = Timestamp::parse(&mut ×tamp[..], ParsingMode::BestAttempt); - assert!(parsed_timestamp.is_err()); + assert!(parsed_timestamp.is_ok()); + assert_eq!( + parsed_timestamp.unwrap(), + Some(partial_result), + "{}", + timestamp.escape_ascii() + ); } } @@ -466,4 +549,25 @@ mod tests { assert_eq!(parsed_timestamp, Some(expected)); } } + + #[test_log::test] + fn timestamp_no_time_marker() { + let timestamp = "2024-06-03 14:08:49"; + + let parsed_timestamp_strict = + Timestamp::parse(&mut timestamp.as_bytes(), ParsingMode::Strict); + assert!(parsed_timestamp_strict.is_err()); + + let parsed_timestamp_best_attempt = + Timestamp::parse(&mut timestamp.as_bytes(), ParsingMode::BestAttempt).unwrap(); + assert_eq!( + parsed_timestamp_best_attempt, + Some(Timestamp { + year: 2024, + month: Some(6), + day: Some(3), + ..Timestamp::default() + }) + ); + } }