Skip to content

Commit

Permalink
add a way to diff deepclone data
Browse files Browse the repository at this point in the history
  • Loading branch information
s3bk committed Nov 5, 2023
1 parent 0e411b4 commit f65d4c7
Show file tree
Hide file tree
Showing 5 changed files with 126 additions and 3 deletions.
109 changes: 108 additions & 1 deletion pdf/src/build.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use std::collections::HashMap;
use std::collections::HashSet;
use std::ops::Range;
use std::sync::Arc;

Expand Down Expand Up @@ -201,6 +202,10 @@ pub struct Importer<'a, R, U> {
shared: HashMap<usize, (AnySync, AnySync)>,
}

pub struct ImporterMap<R> {
resolver: R,
map: HashMap<PlainRef, PlainRef>,
}

impl<'a, R, U> Importer<'a, R, U> {
pub fn new(resolver: R, updater: &'a mut U) -> Self {
Expand All @@ -209,10 +214,109 @@ impl<'a, R, U> Importer<'a, R, U> {
updater,
map: Default::default(),
rcrefs: Default::default(),
shared: Default::default()
shared: Default::default(),
}
}
}
impl<'a, R: Resolve, U> Importer<'a, R, U> {
pub fn finish(self) -> ImporterMap<R> {
ImporterMap { resolver: self.resolver, map: self.map }
}
}
impl<R: Resolve> ImporterMap<R> {
fn compare_dict(&self, a_dict: &Dictionary, b_dict: &Dictionary, new_resolve: &impl Resolve) -> Result<bool> {
let mut same = true;
let mut b_unvisited: HashSet<_> = b_dict.keys().collect();
for (a_key, a_val) in a_dict.iter() {
if let Some(b_val) = b_dict.get(a_key) {
if !self.compare_prim(a_val, b_val, new_resolve)? {
println!("value for key {a_key} mismatch.");
same = false;
}
b_unvisited.remove(a_key);
} else {
println!("missing key {a_key} in b.");
same = false;
}
}
for b_key in b_unvisited.iter() {
println!("missing key {b_key} in a.");
}
Ok(same && !b_unvisited.is_empty())
}
fn compare_prim(&self, a: &Primitive, b: &Primitive, new_resolve: &impl Resolve) -> Result<bool> {
match (a, b) {
(Primitive::Array(a_parts), Primitive::Array(b_parts)) => {
if a_parts.len() != b_parts.len() {
dbg!(a_parts, b_parts);
println!("different length {} vs. {}", a_parts.len(), b_parts.len());
println!("a = {a_parts:?}");
println!("b = {b_parts:?}");
return Ok(false);
}
for (a, b) in a_parts.iter().zip(b_parts.iter()) {
if !self.compare_prim(a, b, new_resolve)? {
return Ok(false);
}
}
Ok(true)
}
(Primitive::Dictionary(a_dict), Primitive::Dictionary(b_dict)) => {
self.compare_dict(a_dict, b_dict, new_resolve)
}
(Primitive::Reference(r1), Primitive::Reference(r2)) => {
match self.map.get(&r1) {
Some(r) if r == r2 => Ok(true),
_ => Ok(false)
}
}
(Primitive::Stream(a_s), Primitive::Stream(b_s)) => {
if !self.compare_dict(&a_s.info, &b_s.info, new_resolve)? {
println!("stream dicts differ");
return Ok(false)
}
let a_data = a_s.raw_data(&self.resolver)?;
let b_data = b_s.raw_data(new_resolve)?;
if a_data != b_data {
println!("data differs.");
return Ok(false)
}
Ok(true)
}
(Primitive::Integer(a), Primitive::Number(b)) => Ok(*a as f32 == *b),
(Primitive::Number(a), Primitive::Integer(b)) => Ok(*a == *b as f32),
(Primitive::Reference(a_ref), b) => {
let a = self.resolver.resolve(*a_ref)?;
self.compare_prim(&a, b, new_resolve)
}
(a, Primitive::Reference(b_ref)) => {
let b = new_resolve.resolve(*b_ref)?;
self.compare_prim(a, &b, new_resolve)
}
(ref a, ref b) => {
if a == b {
Ok(true)
} else {
println!("{a:?} != {b:?}");
Ok(false)
}
}
}
}
pub fn verify(&self, new_resolve: &impl Resolve) -> Result<bool> {
let mut same = true;
for (&old_ref, &new_ref) in self.map.iter() {
let old = self.resolver.resolve(old_ref)?;
let new = new_resolve.resolve(new_ref)?;

if !self.compare_prim(&old, &new, new_resolve)? {
same = false;
}
}
Ok(same)
}
}

impl<'a, R: Resolve, U> Resolve for Importer<'a, R, U> {
fn get<T: Object+datasize::DataSize>(&self, r: Ref<T>) -> Result<RcRef<T>> {
self.resolver.get(r)
Expand Down Expand Up @@ -257,6 +361,7 @@ impl<'a, R: Resolve, U: Updater> Cloner for Importer<'a, R, U> {

let r = self.updater.create(clone)?;
self.map.insert(old.get_inner(), r.get_ref().get_inner());

Ok(r.get_ref())
}
fn clone_plainref(&mut self, old: PlainRef) -> Result<PlainRef> {
Expand All @@ -270,6 +375,7 @@ impl<'a, R: Resolve, U: Updater> Cloner for Importer<'a, R, U> {
.get_ref().get_inner();

self.map.insert(old, new);

Ok(new)
}
fn clone_rcref<T: DeepClone + ObjectWrite + DataSize>(&mut self, old: &RcRef<T>) -> Result<RcRef<T>> {
Expand All @@ -283,6 +389,7 @@ impl<'a, R: Resolve, U: Updater> Cloner for Importer<'a, R, U> {
let new = self.updater.create::<T>(new)?;
self.rcrefs.insert(new.get_ref().get_inner(), AnySync::new(new.data().clone()));
self.map.insert(old_ref, new.get_ref().get_inner());

Ok(new)
}
fn clone_shared<T: DeepClone>(&mut self, old: &Shared<T>) -> Result<Shared<T>> {
Expand Down
3 changes: 3 additions & 0 deletions pdf/src/file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,9 @@ where
pub fn into_inner(self) -> B {
self.backend
}
pub fn resolver(&self) -> impl Resolve + '_ {
StorageResolver::new(self)
}
pub fn with_cache(backend: B, options: ParseOptions, object_cache: OC, stream_cache: SC, log: L) -> Result<Self> {
Ok(Storage {
start_offset: backend.locate_start_offset()?,
Expand Down
9 changes: 7 additions & 2 deletions pdf/src/object/color.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ impl Object for ColorSpace {
impl ColorSpace {
fn from_primitive_depth(p: Primitive, resolve: &impl Resolve, depth: usize) -> Result<ColorSpace> {
let p = p.resolve(resolve)?;

if let Ok(name) = p.as_name() {
let cs = match name {
"DeviceGray" => ColorSpace::DeviceGray,
Expand Down Expand Up @@ -163,8 +164,12 @@ impl ObjectWrite for ColorSpace {
ColorSpace::Indexed(ref base, hival, ref lookup) => {
let base = base.to_primitive(update)?;
let hival = Primitive::Integer(hival.into());
let lookup = Stream::new((), lookup.clone()).to_primitive(update)?;
Ok(Primitive::Array(vec![base, hival, lookup]))
let lookup = if lookup.len() < 100 {
PdfString::new((**lookup).into()).into()
} else {
Stream::new((), lookup.clone()).to_primitive(update)?
};
Ok(Primitive::Array(vec![Primitive::name("Indexed"), base, hival, lookup]))
}
ref p => {
dbg!(p);
Expand Down
2 changes: 2 additions & 0 deletions pdf/src/object/stream.rs
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,8 @@ impl<I: ObjectWrite> Stream<I> {
StreamFilter::LZWDecode(ref p) => Some(p.to_primitive(update)?),
StreamFilter::FlateDecode(ref p) => Some(p.to_primitive(update)?),
StreamFilter::DCTDecode(ref p) => Some(p.to_primitive(update)?),
StreamFilter::CCITTFaxDecode(ref p) => Some(p.to_primitive(update)?),
StreamFilter::JBIG2Decode(ref p) => Some(p.to_primitive(update)?),
_ => None
} {
assert!(params.is_none());
Expand Down
6 changes: 6 additions & 0 deletions pdf/src/primitive.rs
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,12 @@ impl PdfStream {
writeln!(out, "\nendstream")?;
Ok(())
}
pub fn raw_data(&self, resolve: &impl Resolve) -> Result<Arc<[u8]>> {
match self.inner {
StreamInner::InFile { id, ref file_range } => resolve.stream_data(id, file_range.clone()),
StreamInner::Pending { ref data } => Ok(data.clone())
}
}
}
impl DeepClone for PdfStream {
fn deep_clone(&self, cloner: &mut impl Cloner) -> Result<Self> {
Expand Down

0 comments on commit f65d4c7

Please sign in to comment.