Skip to content

Commit

Permalink
Add metadata methods to PDFDocument (#204)
Browse files Browse the repository at this point in the history
* UTF-8 encode strings

* Add unit tests for utf8Encode

* Add another utf8Encode test

* Supporting UTF-16 encoding of PDFHexStrings

* Add tests for PDFHexString and PDFString

* Add PDFDocument metadata methods

* Add doc comments to PDFDocument metadata methods

* Update scratchpad

* Update test apps
  • Loading branch information
Hopding authored Oct 1, 2019
1 parent adf0e57 commit 7e06c40
Show file tree
Hide file tree
Showing 14 changed files with 681 additions and 30 deletions.
9 changes: 9 additions & 0 deletions apps/node/tests/test1.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,15 @@ const ipsumLines = [
export default async (assets: Assets) => {
const pdfDoc = await PDFDocument.create();

pdfDoc.setTitle('🥚 The Life of an Egg 🍳');
pdfDoc.setAuthor('Humpty Dumpty');
pdfDoc.setSubject('📘 An Epic Tale of Woe 📖');
pdfDoc.setKeywords(['eggs', 'wall', 'fall', 'king', 'horses', 'men']);
pdfDoc.setProducer('PDF App 9000 🤖');
pdfDoc.setCreator('PDF App 9000 🤖');
pdfDoc.setCreationDate(new Date('2018-06-24T01:58:37.228Z'));
pdfDoc.setModificationDate(new Date('2018-12-21T07:00:11.000Z'));

pdfDoc.registerFontkit(fontkit);

const timesRomanFont = await pdfDoc.embedFont(StandardFonts.TimesRoman);
Expand Down
9 changes: 9 additions & 0 deletions apps/rn/src/tests/test1.js
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,15 @@ const ipsumLines = [
export default async () => {
const pdfDoc = await PDFDocument.create();

pdfDoc.setTitle('🥚 The Life of an Egg 🍳');
pdfDoc.setAuthor('Humpty Dumpty');
pdfDoc.setSubject('📘 An Epic Tale of Woe 📖');
pdfDoc.setKeywords(['eggs', 'wall', 'fall', 'king', 'horses', 'men']);
pdfDoc.setProducer('PDF App 9000 🤖');
pdfDoc.setCreator('PDF App 9000 🤖');
pdfDoc.setCreationDate(new Date('2018-06-24T01:58:37.228Z'));
pdfDoc.setModificationDate(new Date('2018-12-21T07:00:11.000Z'));

pdfDoc.registerFontkit(fontkit);

const timesRomanFont = await pdfDoc.embedFont(StandardFonts.TimesRoman);
Expand Down
9 changes: 9 additions & 0 deletions apps/web/test1.html
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,15 @@

const pdfDoc = await PDFDocument.create();

pdfDoc.setTitle('🥚 The Life of an Egg 🍳');
pdfDoc.setAuthor('Humpty Dumpty');
pdfDoc.setSubject('📘 An Epic Tale of Woe 📖');
pdfDoc.setKeywords(['eggs', 'wall', 'fall', 'king', 'horses', 'men']);
pdfDoc.setProducer('PDF App 9000 🤖');
pdfDoc.setCreator('PDF App 9000 🤖');
pdfDoc.setCreationDate(new Date('2018-06-24T01:58:37.228Z'));
pdfDoc.setModificationDate(new Date('2018-12-21T07:00:11.000Z'));

pdfDoc.registerFontkit(fontkit);

const timesRomanFont = await pdfDoc.embedFont(StandardFonts.TimesRoman);
Expand Down
35 changes: 30 additions & 5 deletions scratchpad/index.ts
Original file line number Diff line number Diff line change
@@ -1,15 +1,40 @@
import fs from 'fs';
import { openPdf, Reader } from './open';

import { PDFDocument } from 'src/index';
import { PDFDocument, StandardFonts } from 'src/index';

(async () => {
const pdfDoc = await PDFDocument.load(
fs.readFileSync('assets/pdfs/normal.pdf'),
const pdfDoc = await PDFDocument.create();

const timesRomanFont = await pdfDoc.embedFont(StandardFonts.TimesRoman);
const helveticaFont = await pdfDoc.embedFont(StandardFonts.Helvetica);

const page = pdfDoc.addPage([500, 600]);

page.setFont(timesRomanFont);
page.drawText('The Life of an Egg', { x: 60, y: 500, size: 50 });
page.drawText('An Epic Tale of Woe', { x: 125, y: 460, size: 25 });

page.setFont(helveticaFont);
page.drawText(
[
'Humpty Dumpty sat on a wall',
'Humpty Dumpty had a great fall;',
`All the king's horses and all the king's men`,
`Couldn't put Humpty together again.`,
].join('\n'),
{ x: 75, y: 275, size: 20, lineHeight: 25 },
);
page.drawText('- Humpty Dumpty', { x: 250, y: 150, size: 20 });

console.log('Count:', pdfDoc.getPageCount());
pdfDoc.removePage(1);
pdfDoc.setTitle('🥚 The Life of an Egg 🍳');
pdfDoc.setAuthor('Humpty Dumpty');
pdfDoc.setSubject('📘 An Epic Tale of Woe 📖');
pdfDoc.setKeywords(['eggs', 'wall', 'fall', 'king', 'horses', 'men']);
pdfDoc.setProducer('PDF App 9000 🤖');
pdfDoc.setCreator('pdf-lib (https://github.com/Hopding/pdf-lib)');
pdfDoc.setCreationDate(new Date('2018-06-24T01:58:37.228Z'));
pdfDoc.setModificationDate(new Date('2019-12-21T07:00:11.000Z'));

const pdfBytes = await pdfDoc.save();

Expand Down
144 changes: 143 additions & 1 deletion src/api/PDFDocument.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,15 @@ import {
JpegEmbedder,
PDFCatalog,
PDFContext,
PDFDict,
PDFHexString,
PDFName,
PDFObjectCopier,
PDFPageLeaf,
PDFPageTree,
PDFParser,
PDFStreamWriter,
PDFString,
PDFWriter,
PngEmbedder,
StandardFontEmbedder,
Expand Down Expand Up @@ -182,6 +186,8 @@ export default class PDFDocument {
this.images = [];

if (!ignoreEncryption && this.isEncrypted) throw new EncryptedPDFError();

this.updateInfoDict();
}

/**
Expand All @@ -197,10 +203,123 @@ export default class PDFDocument {
this.fontkit = fontkit;
}

/**
* Set this document's title metadata. The title will appear in the
* "Document Properties" section of most PDF readers. For example:
* ```js
* pdfDoc.setTitle('🥚 The Life of an Egg 🍳')
* ```
* @param title The title of this document.
*/
setTitle(title: string): void {
assertIs(title, 'title', ['string']);
const key = PDFName.of('Title');
this.getInfoDict().set(key, PDFHexString.fromText(title));
}

/**
* Set this document's author metadata. The author will appear in the
* "Document Properties" section of most PDF readers. For example:
* ```js
* pdfDoc.setAuthor('Humpty Dumpty')
* ```
* @param author The author of this document.
*/
setAuthor(author: string): void {
assertIs(author, 'author', ['string']);
const key = PDFName.of('Author');
this.getInfoDict().set(key, PDFHexString.fromText(author));
}

/**
* Set this document's subject metadata. The subject will appear in the
* "Document Properties" section of most PDF readers. For example:
* ```js
* pdfDoc.setSubject('📘 An Epic Tale of Woe 📖')
* ```
* @param subject The subject of this document.
*/
setSubject(subject: string): void {
assertIs(subject, 'author', ['string']);
const key = PDFName.of('Subject');
this.getInfoDict().set(key, PDFHexString.fromText(subject));
}

/**
* Set this document's keyword metadata. These keywords will appear in the
* "Document Properties" section of most PDF readers. For example:
* ```js
* pdfDoc.setKeywords(['eggs', 'wall', 'fall', 'king', 'horses', 'men'])
* ```
* @param keywords An array of keywords associated with this document.
*/
setKeywords(keywords: string[]): void {
assertIs(keywords, 'keywords', [Array]);
const key = PDFName.of('Keywords');
this.getInfoDict().set(key, PDFHexString.fromText(keywords.join(' ')));
}

/**
* Set this document's creator metadata. The creator will appear in the
* "Document Properties" section of most PDF readers. For example:
* ```js
* pdfDoc.setCreator('PDF App 9000 🤖')
* ```
* @param creator The creator of this document.
*/
setCreator(creator: string): void {
assertIs(creator, 'creator', ['string']);
const key = PDFName.of('Creator');
this.getInfoDict().set(key, PDFHexString.fromText(creator));
}

/**
* Set this document's producer metadata. The producer will appear in the
* "Document Properties" section of most PDF readers. For example:
* ```js
* pdfDoc.setProducer('PDF App 9000 🤖')
* ```
* @param producer The producer of this document.
*/
setProducer(producer: string): void {
assertIs(producer, 'creator', ['string']);
const key = PDFName.of('Producer');
this.getInfoDict().set(key, PDFHexString.fromText(producer));
}

/**
* Set this document's creation date metadata. The creation date will appear
* in the "Document Properties" section of most PDF readers. For example:
* ```js
* pdfDoc.setCreationDate(new Date())
* ```
* @param creationDate The date this document was created.
*/
setCreationDate(creationDate: Date): void {
assertIs(creationDate, 'creationDate', [[Date, 'Date']]);
const key = PDFName.of('CreationDate');
this.getInfoDict().set(key, PDFString.fromDate(creationDate));
}

/**
* Set this document's modification date metadata. The modification date will
* appear in the "Document Properties" section of most PDF readers. For
* example:
* ```js
* pdfDoc.setModificationDate(new Date())
* ```
* @param modificationDate The date this document was last modified.
*/
setModificationDate(modificationDate: Date): void {
assertIs(modificationDate, 'modificationDate', [[Date, 'Date']]);
const key = PDFName.of('ModDate');
this.getInfoDict().set(key, PDFString.fromDate(modificationDate));
}

/**
* Get the number of pages contained in this document. For example:
* ```js
* const totalPages = pdfDoc.getPageCount();
* const totalPages = pdfDoc.getPageCount()
* ```
* @returns The number of pages in this document.
*/
Expand Down Expand Up @@ -639,6 +758,29 @@ export default class PDFDocument {
return dataUri ? `data:application/pdf;base64,${base64}` : base64;
}

private updateInfoDict(): void {
const pdfLib = `pdf-lib (https://github.com/Hopding/pdf-lib)`;
const now = new Date();

const info = this.getInfoDict();

this.setProducer(pdfLib);
this.setModificationDate(now);

if (!info.get(PDFName.of('Creator'))) this.setCreator(pdfLib);
if (!info.get(PDFName.of('CreationDate'))) this.setCreationDate(now);
}

private getInfoDict(): PDFDict {
const existingInfo = this.context.lookup(this.context.trailerInfo.Info);
if (existingInfo instanceof PDFDict) return existingInfo;

const newInfo = this.context.obj({});
this.context.trailerInfo.Info = this.context.register(newInfo);

return newInfo;
}

private assertFontkit(): Fontkit {
if (!this.fontkit) throw new FontkitNotRegisteredError();
return this.fontkit;
Expand Down
1 change: 1 addition & 0 deletions src/api/PDFPage.ts
Original file line number Diff line number Diff line change
Expand Up @@ -487,6 +487,7 @@ export default class PDFPage {

/**
* Change the default position of this page to be further right on the y-axis.
* For example:
* ```js
* page.moveTo(50, 50)
* page.drawText('I will be drawn at (50, 50)')
Expand Down
32 changes: 10 additions & 22 deletions src/core/embedders/CMap.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
import { Glyph } from 'src/types/fontkit';

import { toHexStringOfMinLength } from 'src/utils';
import { toHexString, toHexStringOfMinLength } from 'src/utils';
import {
hasSurrogates,
highSurrogate,
isWithinBMP,
lowSurrogate,
} from 'src/utils/unicode';

/** [[start, end], mappings] */
type BfRange = [[string, string], string[]];
Expand Down Expand Up @@ -76,33 +82,15 @@ const cmapHexFormat = (...values: string[]) => `<${values.join('')}>`;
const cmapHexString = (value: number) => toHexStringOfMinLength(value, 4);

const cmapCodePointFormat = (codePoint: number) => {
if (isUtf8CodePoint(codePoint)) return cmapHexString(codePoint);
if (isWithinBMP(codePoint)) return cmapHexString(codePoint);

if (isUtf16CodePoint(codePoint)) {
if (hasSurrogates(codePoint)) {
const hs = highSurrogate(codePoint);
const ls = lowSurrogate(codePoint);
return `${cmapHexString(hs)}${cmapHexString(ls)}`;
}

const hex = codePoint.toString(16);
const hex = toHexString(codePoint);
const msg = `0x${hex} is not a valid UTF-8 or UTF-16 codepoint.`;
throw new Error(msg);
};

// From: https://en.wikipedia.org/wiki/UTF-16#Description
const isUtf8CodePoint = (codePoint: number) =>
codePoint >= 0 && codePoint <= 0xffff;

// From: https://en.wikipedia.org/wiki/UTF-16#Description
const isUtf16CodePoint = (codePoint: number) =>
codePoint >= 0x010000 && codePoint <= 0x10ffff;

// From Unicode 3.0 spec, section 3.7:
// http://unicode.org/versions/Unicode3.0.0/ch03.pdf
const highSurrogate = (codePoint: number) =>
Math.floor((codePoint - 0x10000) / 0x400) + 0xd800;

// From Unicode 3.0 spec, section 3.7:
// http://unicode.org/versions/Unicode3.0.0/ch03.pdf
const lowSurrogate = (codePoint: number) =>
((codePoint - 0x10000) % 0x400) + 0xdc00;
17 changes: 16 additions & 1 deletion src/core/objects/PDFHexString.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,25 @@
import PDFObject from 'src/core/objects/PDFObject';
import CharCodes from 'src/core/syntax/CharCodes';
import { copyStringIntoBuffer } from 'src/utils';
import {
copyStringIntoBuffer,
toHexStringOfMinLength,
utf16Encode,
} from 'src/utils';

class PDFHexString extends PDFObject {
static of = (value: string) => new PDFHexString(value);

static fromText = (value: string) => {
const encoded = utf16Encode(value);

let hex = '';
for (let idx = 0, len = encoded.length; idx < len; idx++) {
hex += toHexStringOfMinLength(encoded[idx], 4);
}

return new PDFHexString(hex);
};

private readonly value: string;

constructor(value: string) {
Expand Down
12 changes: 11 additions & 1 deletion src/core/objects/PDFString.ts
Original file line number Diff line number Diff line change
@@ -1,13 +1,23 @@
import PDFObject from 'src/core/objects/PDFObject';
import CharCodes from 'src/core/syntax/CharCodes';
import { copyStringIntoBuffer } from 'src/utils';
import { copyStringIntoBuffer, padStart } from 'src/utils';

class PDFString extends PDFObject {
// The PDF spec allows newlines and parens to appear directly within a literal
// string. These character _may_ be escaped. But they do not _have_ to be. So
// for simplicity, we will not bother escaping them.
static of = (value: string) => new PDFString(value);

static fromDate = (date: Date) => {
const year = padStart(String(date.getUTCFullYear()), 4, '0');
const month = padStart(String(date.getUTCMonth() + 1), 2, '0');
const day = padStart(String(date.getUTCDate()), 2, '0');
const hours = padStart(String(date.getUTCHours()), 2, '0');
const mins = padStart(String(date.getUTCMinutes()), 2, '0');
const secs = padStart(String(date.getUTCSeconds()), 2, '0');
return new PDFString(`D:${year}${month}${day}${hours}${mins}${secs}Z`);
};

private readonly value: string;

private constructor(value: string) {
Expand Down
1 change: 1 addition & 0 deletions src/utils/index.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
export * from 'src/utils/arrays';
export * from 'src/utils/async';
export * from 'src/utils/strings';
export * from 'src/utils/unicode';
export * from 'src/utils/numbers';
export * from 'src/utils/errors';
export * from 'src/utils/base64';
Expand Down
Loading

0 comments on commit 7e06c40

Please sign in to comment.