Skip to content

Commit

Permalink
Allow parsing off-spec PDF files with prefixes before the header (#362)
Browse files Browse the repository at this point in the history
  • Loading branch information
gmalette authored Dec 13, 2024
1 parent ccf6ae9 commit d0874c3
Showing 1 changed file with 27 additions and 2 deletions.
29 changes: 27 additions & 2 deletions src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use std::collections::HashSet;
use std::str::{self, FromStr};

use nom::branch::alt;
use nom::bytes::complete::{tag, take, take_while, take_while1, take_while_m_n};
use nom::bytes::complete::{tag, take, take_until, take_while, take_while1, take_while_m_n};
use nom::character::complete::multispace1;
use nom::character::complete::{digit0, digit1, one_of};
use nom::character::complete::{space0, space1};
Expand Down Expand Up @@ -416,7 +416,16 @@ fn _indirect_object<'a>(
pub fn header(input: ParserInput) -> Option<String> {
strip_nom(map_res(
delimited(
tag(b"%PDF-"),
tuple((
map_res(take_until("%PDF-"), |v: ParserInput| {
if v.len() > 1024 {
return Err("Header prefix too long");
}

Ok(v)
}),
tag(b"%PDF-"),
)),
take_while(|c: u8| !b"\r\n".contains(&c)),
pair(eol, many0_count(comment)),
),
Expand Down Expand Up @@ -632,6 +641,22 @@ mod tests {
assert_eq!(real(test_span(b"10.")), Some(10.0));
}

#[test]
fn parses_malformed_header_with_prefix() {
let stream = b"\x20\x20\x20\x20\x0A\x0A\x20\x20%PDF-1.4\n";

assert_eq!(header(test_span(stream)), Some("1.4".to_string()));

// 1025 bytes prefix
let stream_with_prefix_too_long = [b' '; 1025]
.iter()
.copied()
.chain(b"%PDF-1.4".iter().copied())
.collect::<Vec<u8>>();

assert_eq!(header(test_span(&stream_with_prefix_too_long)), None)
}

#[test]
fn parse_string() {
let literal_string = |i| tstrip(literal_string(i));
Expand Down

0 comments on commit d0874c3

Please sign in to comment.