Skip to content

Commit

Permalink
Decompress when it's possible images in using DecompressionStream
Browse files Browse the repository at this point in the history
Getting images is already asynchronous, so we can use this opportunity
to use DecompressStream (which is async too) to decompress images.
  • Loading branch information
calixteman committed Jun 2, 2024
1 parent 53dfb5a commit 9654ad5
Show file tree
Hide file tree
Showing 7 changed files with 148 additions and 29 deletions.
21 changes: 21 additions & 0 deletions src/core/base_stream.js
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,27 @@ class BaseStream {
unreachable("Abstract method `getBytes` called");
}

/**
* NOTE: This method can only be used to get image-data that is guaranteed
* to be fully loaded, since otherwise intermittent errors may occur;
* note the `ObjectLoader` class.
*/
async getImageData(length, ignoreColorSpace) {
return this.getBytes(length, ignoreColorSpace);
}

async asyncGetBytes() {
unreachable("Abstract method `asyncGetBytes` called");
}

get isAsync() {
return false;
}

get canAsyncDecodeImageFromBuffer() {
return false;
}

peekByte() {
const peekedByte = this.getByte();
if (peekedByte !== -1) {
Expand Down
8 changes: 8 additions & 0 deletions src/core/decode_stream.js
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,14 @@ class DecodeStream extends BaseStream {
return this.buffer.subarray(pos, end);
}

async getImageData(length, ignoreColorSpace = false) {
if (!this.canAsyncDecodeImageFromBuffer) {
return this.getBytes(length, ignoreColorSpace);
}
const data = await this.stream.asyncGetBytes();
return this.decodeImage(data, ignoreColorSpace);
}

reset() {
this.pos = 0;
}
Expand Down
52 changes: 52 additions & 0 deletions src/core/flate_stream.js
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

import { FormatError, info } from "../shared/util.js";
import { DecodeStream } from "./decode_stream.js";
import { Stream } from "./stream.js";

const codeLenCodeMap = new Int32Array([
16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15,
Expand Down Expand Up @@ -148,6 +149,57 @@ class FlateStream extends DecodeStream {
this.codeBuf = 0;
}

async getImageData(length, _ignoreColorSpace) {
const data = await this.asyncGetBytes();
return data?.subarray(0, length) || this.getBytes(length);
}

async asyncGetBytes() {
this.str.reset();
const bytes = this.str.getBytes();

try {
const { readable, writable } = new DecompressionStream("deflate");
const writer = writable.getWriter();
writer.write(bytes);
writer.close();

const chunks = [];
let totalLength = 0;

for await (const chunk of readable) {
chunks.push(chunk);
totalLength += chunk.byteLength;
}
const data = new Uint8Array(totalLength);
let offset = 0;
for (const chunk of chunks) {
data.set(chunk, offset);
offset += chunk.byteLength;
}

return data;
} catch {
// DecompressionStream failed (for example because there are some extra
// bytes after the end of the compressed data), so we fallback to our
// decoder.
// We already get the bytes from the underlying stream, so we just reuse
// them to avoid get them again.
this.str = new Stream(
bytes,
2 /* = header size (see ctor) */,
bytes.length,
this.str.dict
);
this.reset();
return null;
}
}

get isAsync() {
return true;
}

getBits(bits) {
const str = this.str;
let codeSize = this.codeSize;
Expand Down
29 changes: 17 additions & 12 deletions src/core/image.js
Original file line number Diff line number Diff line change
Expand Up @@ -565,7 +565,7 @@ class PDFImage {
return output;
}

fillOpacity(rgbaBuf, width, height, actualHeight, image) {
async fillOpacity(rgbaBuf, width, height, actualHeight, image) {
if (typeof PDFJSDev === "undefined" || PDFJSDev.test("TESTING")) {
assert(
rgbaBuf instanceof Uint8ClampedArray,
Expand All @@ -580,7 +580,7 @@ class PDFImage {
sw = smask.width;
sh = smask.height;
alphaBuf = new Uint8ClampedArray(sw * sh);
smask.fillGrayBuffer(alphaBuf);
await smask.fillGrayBuffer(alphaBuf);
if (sw !== width || sh !== height) {
alphaBuf = resizeImageMask(alphaBuf, smask.bpc, sw, sh, width, height);
}
Expand All @@ -590,7 +590,7 @@ class PDFImage {
sh = mask.height;
alphaBuf = new Uint8ClampedArray(sw * sh);
mask.numComps = 1;
mask.fillGrayBuffer(alphaBuf);
await mask.fillGrayBuffer(alphaBuf);

// Need to invert values in rgbaBuf
for (i = 0, ii = sw * sh; i < ii; ++i) {
Expand Down Expand Up @@ -716,7 +716,7 @@ class PDFImage {
drawWidth === originalWidth &&
drawHeight === originalHeight
) {
const data = this.getImageBytes(originalHeight * rowBytes, {});
const data = await this.getImageBytes(originalHeight * rowBytes, {});
if (isOffscreenCanvasSupported) {
if (mustBeResized) {
return ImageResizer.createImage(
Expand Down Expand Up @@ -774,7 +774,7 @@ class PDFImage {
}

if (isHandled) {
const rgba = this.getImageBytes(imageLength, {
const rgba = await this.getImageBytes(imageLength, {
drawWidth,
drawHeight,
forceRGBA: true,
Expand All @@ -794,7 +794,7 @@ class PDFImage {
case "DeviceRGB":
case "DeviceCMYK":
imgData.kind = ImageKind.RGB_24BPP;
imgData.data = this.getImageBytes(imageLength, {
imgData.data = await this.getImageBytes(imageLength, {
drawWidth,
drawHeight,
forceRGB: true,
Expand All @@ -809,7 +809,7 @@ class PDFImage {
}
}

const imgArray = this.getImageBytes(originalHeight * rowBytes, {
const imgArray = await this.getImageBytes(originalHeight * rowBytes, {
internal: true,
});
// imgArray can be incomplete (e.g. after CCITT fax encoding).
Expand Down Expand Up @@ -852,7 +852,7 @@ class PDFImage {
maybeUndoPreblend = true;

// Color key masking (opacity) must be performed before decoding.
this.fillOpacity(data, drawWidth, drawHeight, actualHeight, comps);
await this.fillOpacity(data, drawWidth, drawHeight, actualHeight, comps);
}

if (this.needsDecode) {
Expand Down Expand Up @@ -893,7 +893,7 @@ class PDFImage {
return imgData;
}

fillGrayBuffer(buffer) {
async fillGrayBuffer(buffer) {
if (typeof PDFJSDev === "undefined" || PDFJSDev.test("TESTING")) {
assert(
buffer instanceof Uint8ClampedArray,
Expand All @@ -913,7 +913,9 @@ class PDFImage {

// rows start at byte boundary
const rowBytes = (width * numComps * bpc + 7) >> 3;
const imgArray = this.getImageBytes(height * rowBytes, { internal: true });
const imgArray = await this.getImageBytes(height * rowBytes, {
internal: true,
});

const comps = this.getComponents(imgArray);
let i, length;
Expand Down Expand Up @@ -975,7 +977,7 @@ class PDFImage {
};
}

getImageBytes(
async getImageBytes(
length,
{
drawWidth,
Expand All @@ -990,7 +992,10 @@ class PDFImage {
this.image.drawHeight = drawHeight || this.height;
this.image.forceRGBA = !!forceRGBA;
this.image.forceRGB = !!forceRGB;
const imageBytes = this.image.getBytes(length, this.ignoreColorSpace);
const imageBytes = await this.image.getImageData(
length,
this.ignoreColorSpace
);

// If imageBytes came from a DecodeStream, we're safe to transfer it
// (and thus detach its underlying buffer) because it will constitute
Expand Down
15 changes: 13 additions & 2 deletions src/core/jbig2_stream.js
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,14 @@ class Jbig2Stream extends DecodeStream {
}

readBlock() {
this.decodeImage();
}

decodeImage(bytes) {
if (this.eof) {
return;
return this.buffer;
}
bytes ||= this.bytes;
const jbig2Image = new Jbig2Image();

const chunks = [];
Expand All @@ -57,7 +62,7 @@ class Jbig2Stream extends DecodeStream {
chunks.push({ data: globals, start: 0, end: globals.length });
}
}
chunks.push({ data: this.bytes, start: 0, end: this.bytes.length });
chunks.push({ data: bytes, start: 0, end: bytes.length });
const data = jbig2Image.parseChunks(chunks);
const dataLength = data.length;

Expand All @@ -68,6 +73,12 @@ class Jbig2Stream extends DecodeStream {
this.buffer = data;
this.bufferLength = dataLength;
this.eof = true;

return this.buffer;
}

get canAsyncDecodeImageFromBuffer() {
return this.stream.isAsync;
}
}

Expand Down
36 changes: 24 additions & 12 deletions src/core/jpeg_stream.js
Original file line number Diff line number Diff line change
Expand Up @@ -24,16 +24,6 @@ import { shadow } from "../shared/util.js";
*/
class JpegStream extends DecodeStream {
constructor(stream, maybeLength, params) {
// Some images may contain 'junk' before the SOI (start-of-image) marker.
// Note: this seems to mainly affect inline images.
let ch;
while ((ch = stream.getByte()) !== -1) {
// Find the first byte of the SOI marker (0xFFD8).
if (ch === 0xff) {
stream.skip(-1); // Reset the stream position to the SOI.
break;
}
}
super(maybeLength);

this.stream = stream;
Expand All @@ -53,8 +43,24 @@ class JpegStream extends DecodeStream {
}

readBlock() {
this.decodeImage();
}

decodeImage(bytes) {
if (this.eof) {
return;
return this.buffer;
}
bytes ||= this.bytes;

// Some images may contain 'junk' before the SOI (start-of-image) marker.
// Note: this seems to mainly affect inline images.
for (let i = 0, ii = bytes.length - 1; i < ii; i++) {
if (bytes[i] === 0xff && bytes[i + 1] === 0xd8) {
if (i > 0) {
bytes = bytes.subarray(i);
}
break;
}
}
const jpegOptions = {
decodeTransform: undefined,
Expand Down Expand Up @@ -89,7 +95,7 @@ class JpegStream extends DecodeStream {
}
const jpegImage = new JpegImage(jpegOptions);

jpegImage.parse(this.bytes);
jpegImage.parse(bytes);
const data = jpegImage.getData({
width: this.drawWidth,
height: this.drawHeight,
Expand All @@ -100,6 +106,12 @@ class JpegStream extends DecodeStream {
this.buffer = data;
this.bufferLength = data.length;
this.eof = true;

return this.buffer;
}

get canAsyncDecodeImageFromBuffer() {
return this.stream.isAsync;
}
}

Expand Down
16 changes: 13 additions & 3 deletions src/core/jpx_stream.js
Original file line number Diff line number Diff line change
Expand Up @@ -42,13 +42,23 @@ class JpxStream extends DecodeStream {
}

readBlock(ignoreColorSpace) {
this.decodeImage(null, ignoreColorSpace);
}

decodeImage(bytes, ignoreColorSpace) {
if (this.eof) {
return;
return this.buffer;
}

this.buffer = JpxImage.decode(this.bytes, ignoreColorSpace);
bytes ||= this.bytes;
this.buffer = JpxImage.decode(bytes, ignoreColorSpace);
this.bufferLength = this.buffer.length;
this.eof = true;

return this.buffer;
}

get canAsyncDecodeImageFromBuffer() {
return this.stream.isAsync;
}
}

Expand Down

0 comments on commit 9654ad5

Please sign in to comment.