Skip to content

Commit

Permalink
Detect reference cycles when parsing streams
Browse files Browse the repository at this point in the history
  • Loading branch information
Heinenen committed Aug 12, 2024
1 parent cb0bd47 commit c2b8287
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 22 deletions.
27 changes: 17 additions & 10 deletions src/nom_parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ use crate::content::*;
use crate::error::XrefError;
use crate::xref::*;
use crate::Error;
use std::collections::HashSet;
use std::str::{self, FromStr};

use nom::branch::alt;
Expand Down Expand Up @@ -270,12 +271,12 @@ fn dictionary(input: &[u8]) -> NomResult<Dictionary> {
)(input)
}

fn stream<'a>(input: &'a [u8], reader: &Reader) -> NomResult<'a, Object> {
fn stream<'a>(input: &'a [u8], reader: &Reader, already_seen: &mut HashSet<ObjectId>) -> NomResult<'a, Object> {
let (i, dict) = terminated(dictionary, tuple((space, tag(b"stream"), eol)))(input)?;

if let Ok(length) = dict.get(b"Length").and_then(|value| {
if let Ok(id) = value.as_reference() {
reader.get_object(id).and_then(|value| value.as_i64())
reader.get_object(id, already_seen).and_then(|value| value.as_i64())
} else {
value.as_i64()
}
Expand Down Expand Up @@ -326,22 +327,25 @@ pub fn direct_object(input: &[u8]) -> Option<Object> {
strip_nom(_direct_object(input))
}

fn object<'a>(input: &'a [u8], reader: &Reader) -> NomResult<'a, Object> {
terminated(alt((|input| stream(input, reader), _direct_objects)), space)(input)
fn object<'a>(input: &'a [u8], reader: &Reader, already_seen: &mut HashSet<ObjectId>) -> NomResult<'a, Object> {
terminated(
alt((|input| stream(input, reader, already_seen), _direct_objects)),
space,
)(input)
}

pub fn indirect_object(
input: &[u8], offset: usize, expected_id: Option<ObjectId>, reader: &Reader,
input: &[u8], offset: usize, expected_id: Option<ObjectId>, reader: &Reader, already_seen: &mut HashSet<ObjectId>,
) -> crate::Result<(ObjectId, Object)> {
let (id, mut object) = _indirect_object(&input[offset..], offset, expected_id, reader)?;
let (id, mut object) = _indirect_object(&input[offset..], offset, expected_id, reader, already_seen)?;

offset_stream(&mut object, offset);

Ok((id, object))
}

fn _indirect_object(
input: &[u8], offset: usize, expected_id: Option<ObjectId>, reader: &Reader,
input: &[u8], offset: usize, expected_id: Option<ObjectId>, reader: &Reader, already_seen: &mut HashSet<ObjectId>,
) -> crate::Result<(ObjectId, Object)> {
let (i, (_, object_id)) =
terminated(tuple((space, object_id)), pair(tag(b"obj"), space))(input).map_err(|_| Error::Parse { offset })?;
Expand All @@ -352,8 +356,11 @@ fn _indirect_object(
}

let object_offset = input.len() - i.len();
let (_, mut object) = terminated(|i| object(i, reader), tuple((space, opt(tag(b"endobj")), space)))(i)
.map_err(|_| Error::Parse { offset })?;
let (_, mut object) = terminated(
|i| object(i, reader, already_seen),
tuple((space, opt(tag(b"endobj")), space)),
)(i)
.map_err(|_| Error::Parse { offset })?;

offset_stream(&mut object, object_offset);

Expand Down Expand Up @@ -418,7 +425,7 @@ pub fn xref_and_trailer(input: &[u8], reader: &Reader) -> crate::Result<(Xref, D
Ok((xref, trailer))
}),
(|input| {
_indirect_object(input, 0, None, reader)
_indirect_object(input, 0, None, reader, &mut HashSet::new())
.map(|(_, obj)| {
let res = match obj {
Object::Stream(stream) => decode_xref_stream(stream),
Expand Down
41 changes: 29 additions & 12 deletions src/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

use log::{error, warn};
use std::cmp;
use std::collections::BTreeMap;
use std::collections::{BTreeMap, HashSet};
use std::convert::TryInto;
#[cfg(not(feature = "async"))]
use std::fs::File;
Expand Down Expand Up @@ -273,7 +273,7 @@ impl<'a> Reader<'a> {
let entries_filter_map = |(_, entry): (&_, &_)| {
if let XrefEntry::Normal { offset, .. } = *entry {
let (object_id, mut object) = self
.read_object(offset as usize, None)
.read_object(offset as usize, None, &mut HashSet::new())
.map_err(|e| error!("Object load error: {:?}", e))
.ok()?;
if let Some(filter_func) = filter_func {
Expand Down Expand Up @@ -363,12 +363,22 @@ impl<'a> Reader<'a> {
let object = self.document.get_object(object_id)?;
let stream = object.as_stream()?;

stream.dict.get(b"Length").and_then(|value| {
if let Ok(id) = value.as_reference() {
return self.document.get_object(id).and_then(Object::as_i64);
}
value.as_i64()
})
stream
.dict
.get(b"Length")
.and_then(|value| {
if let Ok(id) = value.as_reference() {
return self.document.get_object(id).and_then(Object::as_i64);
}
value.as_i64()
})
.map_err(|err| {
error!(
"Stream dictionary of '{} {} R' is missing the Length entry",
object_id.0, object_id.1
);
err
})
}

/// Get object offset by object id.
Expand All @@ -386,19 +396,26 @@ impl<'a> Reader<'a> {
}
}

pub fn get_object(&self, id: ObjectId) -> Result<Object> {
pub fn get_object(&self, id: ObjectId, already_seen: &mut HashSet<ObjectId>) -> Result<Object> {
if already_seen.contains(&id) {
warn!("reference cycle detected resolving object {} {}", id.0, id.1);
return Err(Error::ReferenceCycle);
}
already_seen.insert(id);
let offset = self.get_offset(id)?;
let (_, obj) = self.read_object(offset as usize, Some(id))?;
let (_, obj) = self.read_object(offset as usize, Some(id), already_seen)?;

Ok(obj)
}

fn read_object(&self, offset: usize, expected_id: Option<ObjectId>) -> Result<(ObjectId, Object)> {
fn read_object(
&self, offset: usize, expected_id: Option<ObjectId>, already_seen: &mut HashSet<ObjectId>,
) -> Result<(ObjectId, Object)> {
if offset > self.buffer.len() {
return Err(Error::Offset(offset));
}

parser::indirect_object(self.buffer, offset, expected_id, self)
parser::indirect_object(self.buffer, offset, expected_id, self, already_seen)
}

fn get_xref_start(buffer: &[u8]) -> Result<usize> {
Expand Down

0 comments on commit c2b8287

Please sign in to comment.