Replace LinkedHashMap with IndexMap (#293)

* Replace LinkdHashMap with IndexMap * Adapt examples
J-F-Liu · Aug 9, 2024 · 79f5b41 · 79f5b41
1 parent 03997e5
commit 79f5b41
Show file tree

Hide file tree

Showing 4 changed files with 119 additions and 106 deletions.
diff --git a/Cargo.toml b/Cargo.toml
@@ -23,8 +23,8 @@ chrono = { version = "^0.4", optional = true, features = [
 encoding_rs = "0.8.32"
 flate2 = "^1.0"
 image = { version = "^0.24", optional = true }
+indexmap = "2.2.3"
 itoa = "^1.0"
-linked-hash-map = "^0.5"
 log = "^0.4"
 md-5 = "0.10"
 nom = { version = "^7.1", optional = true }
@@ -34,23 +34,22 @@ serde = { version = "1.0", features = ["derive"], optional = true }
 time = { version = "^0.3", features = ["formatting", "parsing"] }
 tokio = { version = "1", features = ["fs", "io-util"], optional = true }
 weezl = "0.1"
-indexmap = "2.2.3"
 
 [dev-dependencies]
-tempfile = "3.3"
-serde_json = "1.0"
 clap = { version = "4.0", features = ["derive"] }
-shellexpand = "3.0"
 env_logger = "0.10"
+serde_json = "1.0"
+shellexpand = "3.0"
+tempfile = "3.3"
 
 [features]
+async = ["tokio/rt-multi-thread", "tokio/macros"]
 chrono_time = ["chrono"]
 default = ["chrono_time", "nom_parser", "rayon"]
 embed_image = ["image"]
 nom_parser = ["nom"]
 pom_parser = ["pom"]
 serde = ["dep:serde"]
-async = ["tokio/rt-multi-thread", "tokio/macros"]
 
 [[example]]
 name = "add_barcode"

diff --git a/README.md b/README.md
@@ -20,22 +20,22 @@ use lopdf::dictionary;
 use lopdf::{Document, Object, Stream};
 use lopdf::content::{Content, Operation};
 
-// with_version specifes the PDF version this document complies with.
+// `with_version` specifes the PDF version this document complies with.
 let mut doc = Document::with_version("1.5");
-// Object IDs are used for cross referencing in PDF documents. `lopdf` helps keep track of them
-// for us. They are simple integers.
-// Calls to `doc.new_object_id` and `doc.add_object` return an object id
+// Object IDs are used for cross referencing in PDF documents.
+// `lopdf` helps keep track of them for us. They are simple integers.
+// Calls to `doc.new_object_id` and `doc.add_object` return an object ID.
 
-// pages is the root node of the page tree
+// "Pages" is the root node of the page tree.
 let pages_id = doc.new_object_id();
 
-// fonts are dictionaries. The type, subtype and basefont tags
-// are straight out of the PDF reference manual
+// Fonts are dictionaries. The "Type", "Subtype" and "BaseFont" tags
+// are straight out of the PDF spec.
 //
 // The dictionary macro is a helper that allows complex
-// key, value relationships to be represented in a simpler
+// key-value relationships to be represented in a simpler
 // visual manner, similar to a match statement.
-// Dictionary is linkedHashMap of byte vector, and object
+// A dictionary is implemented as an IndexMap of Vec<u8>, and Object
 let font_id = doc.add_object(dictionary! {
     // type of dictionary
     "Type" => "Font",
@@ -46,13 +46,13 @@ let font_id = doc.add_object(dictionary! {
     "BaseFont" => "Courier",
 });
 
-// font dictionaries need to be added into resource dictionaries
-// in order to be used.
+// Font dictionaries need to be added into resource
+// dictionaries in order to be used.
 // Resource dictionaries can contain more than just fonts,
-// but normally just contains fonts
-// Only one resource dictionary is allowed per page tree root
+// but normally just contains fonts.
+// Only one resource dictionary is allowed per page tree root.
 let resources_id = doc.add_object(dictionary! {
-    // fonts are actually triplely nested dictionaries. Fun!
+    // Fonts are actually triplely nested dictionaries. Fun!
     "Font" => dictionary! {
         // F1 is the font name used when writing text.
         // It must be unique in the document. It does not
@@ -61,75 +61,75 @@ let resources_id = doc.add_object(dictionary! {
     },
 });
 
-// Content is a wrapper struct around an operations struct that contains a vector of operations
-// The operations struct contains a vector of operations that match up with a particular PDF
-// operator and operands.
-// Reference the PDF reference for more details on these operators and operands.
-// Note, the operators and operands are specified in a reverse order than they
-// actually appear in the PDF file itself.
+// `Content` is a wrapper struct around an operations struct that contains
+// a vector of operations. The operations struct contains a vector of
+// that match up with a particular PDF operator and operands.
+// Refer to the PDF spec for more details on the operators and operands
+// Note, the operators and operands are specified in a reverse order
+// from how they actually appear in the PDF file itself.
 let content = Content {
     operations: vec![
-        // BT begins a text element. it takes no operands
+        // BT begins a text element. It takes no operands.
         Operation::new("BT", vec![]),
-        // Tf specifies the font and font size. Font scaling is complicated in PDFs. Reference
-        // the reference for more info.
-        // The into() methods are defined based on their paired .from() methods (this
-        // functionality is built into rust), and are converting the provided values into
-        // An enum that represents the basic object types in PDF documents.
+        // Tf specifies the font and font size.
+        // Font scaling is complicated in PDFs.
+        // Refer to the spec for more info.
+        // The `into()` methods convert the types into
+        // an enum that represents the basic object types in PDF documents.
         Operation::new("Tf", vec!["F1".into(), 48.into()]),
-        // Td adjusts the translation components of the text matrix. When used for the first
-        // time after BT, it sets the initial text position on the page.
+        // Td adjusts the translation components of the text matrix.
+        // When used for the first time after BT, it sets the initial
+        // text position on the page.
         // Note: PDF documents have Y=0 at the bottom. Thus 600 to print text near the top.
         Operation::new("Td", vec![100.into(), 600.into()]),
         // Tj prints a string literal to the page. By default, this is black text that is
         // filled in. There are other operators that can produce various textual effects and
         // colors
         Operation::new("Tj", vec![Object::string_literal("Hello World!")]),
-        // ET ends the text element
+        // ET ends the text element.
         Operation::new("ET", vec![]),
     ],
 };
 
-// Streams are a dictionary followed by a sequence of bytes. What that sequence of bytes
-// represents depends on context
-// The stream dictionary is set internally to lopdf and normally doesn't
+// Streams are a dictionary followed by a (possibly encoded) sequence of bytes.
+// What that sequence of bytes represents, depends on the context.
+// The stream dictionary is set internally by lopdf and normally doesn't
 // need to be manually manipulated. It contains keys such as
-// Length, Filter, DecodeParams, etc
-//
-// content is a stream of encoded content data.
+// Length, Filter, DecodeParams, etc.
 let content_id = doc.add_object(Stream::new(dictionary! {}, content.encode().unwrap()));
 
 // Page is a dictionary that represents one page of a PDF file.
-// It has a type, parent and contents
+// Its required fields are "Type", "Parent" and "Contents".
 let page_id = doc.add_object(dictionary! {
     "Type" => "Page",
     "Parent" => pages_id,
     "Contents" => content_id,
 });
 
-// Again, pages is the root of the page tree. The ID was already created
-// at the top of the page, since we needed it to assign to the parent element of the page
-// dictionary
+// Again, "Pages" is the root of the page tree. The ID was already created
+// at the top of the page, since we needed it to assign to the parent element
+// of the page dictionary.
 //
-// This is just the basic requirements for a page tree root object. There are also many
-// additional entries that can be added to the dictionary if needed. Some of these can also be
-// defined on the page dictionary itself, and not inherited from the page tree root.
+// These are just the basic requirements for a page tree root object.
+// There are also many additional entries that can be added to the dictionary,
+// if needed. Some of these can also be defined on the page dictionary itself,
+// and not inherited from the page tree root.
 let pages = dictionary! {
     // Type of dictionary
     "Type" => "Pages",
-    // Vector of page IDs in document. Normally would contain more than one ID and be produced
-    // using a loop of some kind
+    // Vector of page IDs in document. Normally would contain more than one ID
+    // and be produced using a loop of some kind.
     "Kids" => vec![page_id.into()],
     // Page count
     "Count" => 1,
     // ID of resources dictionary, defined earlier
     "Resources" => resources_id,
-    // a rectangle that defines the boundaries of the physical or digital media. This is the
-    // "Page Size"
+    // A rectangle that defines the boundaries of the physical or digital media.
+    // This is the "page size".
     "MediaBox" => vec![0.into(), 0.into(), 595.into(), 842.into()],
 };
 
-// using insert() here, instead of add_object() since the id is already known.
+// Using `insert()` here, instead of `add_object()` since the ID is already known.
 doc.objects.insert(pages_id, Object::Dictionary(pages));
 
 // Creating document catalog.
@@ -139,8 +139,8 @@ let catalog_id = doc.add_object(dictionary! {
     "Pages" => pages_id,
 });
 
-// Root key in trailer is set here to ID of document catalog,
-// remainder of trailer is set during doc.save().
+// The "Root" key in trailer is set to the ID of the document catalog,
+// the remainder of the trailer is set during `doc.save()`.
 doc.trailer.set("Root", catalog_id);
 doc.compress();
 
@@ -207,15 +207,15 @@ pub fn generate_fake_document() -> Document {
 }
 
 fn main() -> std::io::Result<()> {
-    // Generate a stack of Documents to merge
+    // Generate a stack of Documents to merge.
     let documents = vec![
         generate_fake_document(),
         generate_fake_document(),
         generate_fake_document(),
         generate_fake_document(),
     ];
 
-    // Define a starting max_id (will be used as start index for object_ids)
+    // Define a starting `max_id` (will be used as start index for object_ids).
     let mut max_id = 1;
     let mut pagenum = 1;
     // Collect all Documents Objects grouped by a map
@@ -251,17 +251,17 @@ fn main() -> std::io::Result<()> {
         documents_objects.extend(doc.objects);
     }
 
-    // Catalog and Pages are mandatory
+    // "Catalog" and "Pages" are mandatory.
     let mut catalog_object: Option<(ObjectId, Object)> = None;
     let mut pages_object: Option<(ObjectId, Object)> = None;
 
     // Process all objects except "Page" type
     for (object_id, object) in documents_objects.iter() {
-        // We have to ignore "Page" (as are processed later), "Outlines" and "Outline" objects
-        // All other objects should be collected and inserted into the main Document
+        // We have to ignore "Page" (as are processed later), "Outlines" and "Outline" objects.
+        // All other objects should be collected and inserted into the main Document.
         match object.type_name().unwrap_or("") {
             "Catalog" => {
-                // Collect a first "Catalog" object and use it for the future "Pages"
+                // Collect a first "Catalog" object and use it for the future "Pages".
                 catalog_object = Some((
                     if let Some((id, _)) = catalog_object {
                         id
@@ -301,7 +301,7 @@ fn main() -> std::io::Result<()> {
         }
     }
 
-    // If no "Pages" object found abort
+    // If no "Pages" object found, abort.
     if pages_object.is_none() {
         println!("Pages root not found.");
 
@@ -320,7 +320,7 @@ fn main() -> std::io::Result<()> {
         }
     }
 
-    // If no "Catalog" found abort
+    // If no "Catalog" found, abort.
     if catalog_object.is_none() {
         println!("Catalog root not found.");
 
@@ -370,10 +370,10 @@ fn main() -> std::io::Result<()> {
     // Reorder all new Document objects
     document.renumber_objects();
 
-     //Set any Bookmarks to the First child if they are not set to a page
+    // Set any Bookmarks to the First child if they are not set to a page
     document.adjust_zero_pages();
 
-    //Set all bookmarks to the PDF Object tree then set the Outlines to the Bookmark content map.
+    // Set all bookmarks to the PDF Object tree then set the Outlines to the Bookmark content map.
     if let Some(n) = document.build_outline() {
         if let Ok(x) = document.get_object_mut(catalog_object.0) {
             if let Object::Dictionary(ref mut dict) = x {
@@ -384,9 +384,9 @@ fn main() -> std::io::Result<()> {
 
     document.compress();
 
-    // Save the merged PDF
+    // Save the merged PDF.
     // Store file in current working directory.
-    // Note: Line is excluded when running tests
+    // Note: Line is excluded when running doc tests
     if false {
         document.save("merged.pdf").unwrap();
     }
@@ -439,8 +439,8 @@ use lopdf::Document;
 
 * Why does the library keep everything in memory as high-level objects until finally serializing the entire document?
 
-    Normally a PDF document won't be very large, ranging from tens of KB to hundreds of MB. Memory size is not a bottle neck for today's computer.
-    By keeping the whole document in memory, stream length can be pre-calculated, no need to use a reference object for the Length entry,
-    the resulting PDF file is smaller for distribution and faster for PDF consumers to process.
+    Normally, a PDF document won't be very large, ranging from tens of KB to hundreds of MB. Memory size is not a bottle neck for today's computer.
+    By keeping the whole document in memory, the stream length can be pre-calculated, no need to use a reference object for the Length entry.
+    The resulting PDF file is smaller for distribution and faster for PDF consumers to process.
 
     Producing is a one-time effort, while consuming is many more.