Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Timestamp: Allow short-circuiting on partially invalid inputs #463

Merged
merged 1 commit into from
Sep 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Fixed
- **MusePack**: Fix potential panic when the beginning silence makes up the entire sample count ([PR](https://github.com/Serial-ATA/lofty-rs/pull/449))
- **Timestamp**: Support timestamps without separators (ex. "20240906" vs "2024-09-06") ([issue](https://github.com/Serial-ATA/lofty-rs/issues/452)) ([PR](https://github.com/Serial-ATA/lofty-rs/issues/453))
- **Timestamp**:
- Support timestamps without separators (ex. "20240906" vs "2024-09-06") ([issue](https://github.com/Serial-ATA/lofty-rs/issues/452)) ([PR](https://github.com/Serial-ATA/lofty-rs/pull/453))
- `Timestamp::parse` will now short-circuit when possible in `ParsingMode::{BestAttempt, Relaxed}` ([issue](https://github.com/Serial-ATA/lofty-rs/issues/462)) ([PR](https://github.com/Serial-ATA/lofty-rs/pull/463))
- For example, the timestamp "2024-06-03 14:08:49" contains a space instead of the required "T" marker.
In `ParsingMode::Strict`, this would be an error. Otherwise, the parser will just stop once it hits the space
and return the timestamp up to that point.
- **ID3v2**:
- `ItemKey::Director` will now be written correctly as a TXXX frame ([PR](https://github.com/Serial-ATA/lofty-rs/issues/454))
- When skipping invalid frames in `ParsingMode::{BestAttempt, Relaxed}`, the parser will no longer be able to go out of the bounds
Expand Down
29 changes: 22 additions & 7 deletions lofty/src/id3/v2/items/timestamp_frame.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use crate::config::ParsingMode;
use crate::error::{ErrorKind, LoftyError, Result};
use crate::error::Result;
use crate::id3::v2::{FrameFlags, FrameHeader, FrameId};
use crate::macros::err;
use crate::tag::items::Timestamp;
Expand Down Expand Up @@ -77,14 +77,18 @@ impl<'a> TimestampFrame<'a> {
return Ok(None);
};
let Some(encoding) = TextEncoding::from_u8(encoding_byte) else {
return Err(LoftyError::new(ErrorKind::TextDecode(
"Found invalid encoding",
)));
if parse_mode != ParsingMode::Relaxed {
err!(TextDecode("Found invalid encoding"))
}
return Ok(None);
};

let value = decode_text(reader, TextDecodeOptions::new().encoding(encoding))?.content;
if !value.is_ascii() {
err!(BadTimestamp("Timestamp contains non-ASCII characters"))
if parse_mode == ParsingMode::Strict {
err!(BadTimestamp("Timestamp contains non-ASCII characters"))
}
return Ok(None);
}

let header = FrameHeader::new(id, frame_flags);
Expand All @@ -96,7 +100,18 @@ impl<'a> TimestampFrame<'a> {

let reader = &mut value.as_bytes();

let Some(timestamp) = Timestamp::parse(reader, parse_mode)? else {
let result;
match Timestamp::parse(reader, parse_mode) {
Ok(timestamp) => result = timestamp,
Err(e) => {
if parse_mode != ParsingMode::Relaxed {
return Err(e);
}
return Ok(None);
},
}

let Some(timestamp) = result else {
// Timestamp is empty
return Ok(None);
};
Expand All @@ -105,7 +120,7 @@ impl<'a> TimestampFrame<'a> {
Ok(Some(frame))
}

/// Convert an [`TimestampFrame`] to a byte vec
/// Convert a [`TimestampFrame`] to a byte vec
///
/// # Errors
///
Expand Down
144 changes: 124 additions & 20 deletions lofty/src/tag/items/timestamp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ impl Timestamp {
match $expr {
Ok((_, 0)) => break,
Ok((val, _)) => Some(val as u8),
Err(e) => return Err(e.into()),
Err(e) => return Err(e),
}
};
}
Expand Down Expand Up @@ -126,7 +126,7 @@ impl Timestamp {

let reader = &mut &content[..];

// We need to very that the year is exactly 4 bytes long. This doesn't matter for other segments.
// We need to verify that the year is exactly 4 bytes long. This doesn't matter for other segments.
let (year, bytes_read) = Self::segment::<4>(reader, None, parse_mode)?;
if bytes_read != 4 {
err!(BadTimestamp(
Expand Down Expand Up @@ -173,22 +173,31 @@ impl Timestamp {
sep: Option<u8>,
parse_mode: ParsingMode,
) -> Result<(u16, usize)> {
const STOP_PARSING: (u16, usize) = (0, 0);

if content.is_empty() {
return Ok((0, 0));
return Ok(STOP_PARSING);
}

if let Some(sep) = sep {
let byte = content.read_u8()?;
if byte != sep {
err!(BadTimestamp("Expected a separator"))
if parse_mode == ParsingMode::Strict {
err!(BadTimestamp("Expected a separator"))
}
return Ok(STOP_PARSING);
}
}

if content.len() < SIZE {
err!(BadTimestamp("Timestamp segment is too short"))
if parse_mode == ParsingMode::Strict {
err!(BadTimestamp("Timestamp segment is too short"))
}

return Ok(STOP_PARSING);
}

let mut num = 0;
let mut num = None;
let mut byte_count = 0;
for i in content[..SIZE].iter().copied() {
// Common spec violation: Timestamps may use spaces instead of zeros, so the month of June
Expand All @@ -202,6 +211,8 @@ impl Timestamp {
continue;
}

// TODO: This is a spec violation for ID3v2, but not for ISO 8601 in general. Maybe consider
// making this a warning and allow it for all parsing modes?
if !i.is_ascii_digit() {
// Another spec violation, timestamps in the wild may not use a zero or a space, so
// we would have to treat "06", "6", and " 6" as valid.
Expand All @@ -220,13 +231,23 @@ impl Timestamp {
))
}

num = num * 10 + u16::from(i - b'0');
num = Some(num.unwrap_or(0) * 10 + u16::from(i - b'0'));
byte_count += 1;
}

let Some(parsed_num) = num else {
assert_ne!(
parse_mode,
ParsingMode::Strict,
"The timestamp segment is empty, the parser should've failed before this point."
);

return Ok(STOP_PARSING);
};

*content = &content[byte_count..];

Ok((num, byte_count))
Ok((parsed_num, byte_count))
}

pub(crate) fn verify(&self) -> Result<()> {
Expand Down Expand Up @@ -316,21 +337,83 @@ mod tests {
assert_eq!(timestamp.to_string().len(), 7);
}

fn broken_timestamps() -> [(&'static [u8], Timestamp); 7] {
[
(
b"2024-",
Timestamp {
year: 2024,
..Timestamp::default()
},
),
(
b"2024-06-",
Timestamp {
year: 2024,
month: Some(6),
..Timestamp::default()
},
),
(
b"2024--",
Timestamp {
year: 2024,
..Timestamp::default()
},
),
(
b"2024- -",
Timestamp {
year: 2024,
..Timestamp::default()
},
),
(
b"2024-06-03T",
Timestamp {
year: 2024,
month: Some(6),
day: Some(3),
..Timestamp::default()
},
),
(
b"2024:06",
Timestamp {
year: 2024,
..Timestamp::default()
},
),
(
b"2024-0-",
Timestamp {
year: 2024,
month: Some(0),
..Timestamp::default()
},
),
]
}

#[test_log::test]
fn reject_broken_timestamps() {
let broken_timestamps: &[&[u8]] = &[
b"2024-",
b"2024-06-",
b"2024--",
b"2024- -",
b"2024-06-03T",
b"2024:06",
b"2024-0-",
];
fn reject_broken_timestamps_strict() {
for (timestamp, _) in broken_timestamps() {
let parsed_timestamp = Timestamp::parse(&mut &timestamp[..], ParsingMode::Strict);
assert!(parsed_timestamp.is_err());
}
}

for timestamp in broken_timestamps {
#[test_log::test]
fn accept_broken_timestamps_best_attempt() {
for (timestamp, partial_result) in broken_timestamps() {
let parsed_timestamp = Timestamp::parse(&mut &timestamp[..], ParsingMode::BestAttempt);
assert!(parsed_timestamp.is_err());
assert!(parsed_timestamp.is_ok());
assert_eq!(
parsed_timestamp.unwrap(),
Some(partial_result),
"{}",
timestamp.escape_ascii()
);
}
}

Expand Down Expand Up @@ -466,4 +549,25 @@ mod tests {
assert_eq!(parsed_timestamp, Some(expected));
}
}

#[test_log::test]
fn timestamp_no_time_marker() {
let timestamp = "2024-06-03 14:08:49";

let parsed_timestamp_strict =
Timestamp::parse(&mut timestamp.as_bytes(), ParsingMode::Strict);
assert!(parsed_timestamp_strict.is_err());

let parsed_timestamp_best_attempt =
Timestamp::parse(&mut timestamp.as_bytes(), ParsingMode::BestAttempt).unwrap();
assert_eq!(
parsed_timestamp_best_attempt,
Some(Timestamp {
year: 2024,
month: Some(6),
day: Some(3),
..Timestamp::default()
})
);
}
}