From b1531926d38d275dbddd0299890677fecdb4bdf8 Mon Sep 17 00:00:00 2001 From: Calixte Denizet Date: Fri, 24 May 2024 23:26:02 +0200 Subject: [PATCH] Decompress when it's possible images in using DecompressStream Getting images is already asynchronous, so we can use this opportunity to use DecompressStream (which is async too) to decompress images. --- src/core/base_stream.js | 16 ++++++++++++++ src/core/decode_stream.js | 8 +++++++ src/core/flate_stream.js | 45 +++++++++++++++++++++++++++++++++++++++ src/core/image.js | 29 ++++++++++++++----------- src/core/jbig2_stream.js | 17 +++++++++++++-- src/core/jpeg_stream.js | 36 ++++++++++++++++++++----------- src/core/jpx_stream.js | 18 +++++++++++++--- 7 files changed, 140 insertions(+), 29 deletions(-) diff --git a/src/core/base_stream.js b/src/core/base_stream.js index f10d266ecd8be8..132bb478c24d84 100644 --- a/src/core/base_stream.js +++ b/src/core/base_stream.js @@ -44,6 +44,22 @@ class BaseStream { unreachable("Abstract method `getBytes` called"); } + async getImageData(length, ignoreColorSpace) { + return this.getBytes(length, ignoreColorSpace); + } + + async asyncGetBytes() { + unreachable("Abstract method `asyncGetBytes` called"); + } + + get isAsync() { + return false; + } + + get canAsyncDecodeImageFromBuffer() { + return false; + } + peekByte() { const peekedByte = this.getByte(); if (peekedByte !== -1) { diff --git a/src/core/decode_stream.js b/src/core/decode_stream.js index f2208d726ab6d9..4f0bde5820c358 100644 --- a/src/core/decode_stream.js +++ b/src/core/decode_stream.js @@ -99,6 +99,14 @@ class DecodeStream extends BaseStream { return this.buffer.subarray(pos, end); } + async getImageData(length, ignoreColorSpace = false) { + if (!this.canAsyncDecodeImageFromBuffer) { + return this.getBytes(length, ignoreColorSpace); + } + const data = await this.stream.asyncGetBytes(); + return this.decodeImage(data, ignoreColorSpace); + } + reset() { this.pos = 0; } diff --git a/src/core/flate_stream.js b/src/core/flate_stream.js index f2df7a17c98d8d..8b5208d0e39e2c 100644 --- a/src/core/flate_stream.js +++ b/src/core/flate_stream.js @@ -21,6 +21,7 @@ import { FormatError, info } from "../shared/util.js"; import { DecodeStream } from "./decode_stream.js"; +import { Stream } from "./stream.js"; const codeLenCodeMap = new Int32Array([ 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15, @@ -148,6 +149,50 @@ class FlateStream extends DecodeStream { this.codeBuf = 0; } + get isAsync() { + return true; + } + + async getImageData(length, _ignoreColorSpace) { + const data = await this.asyncGetBytes(); + if (!data) { + return this.getBytes(length); + } + return data.subarray(0, length); + } + + async asyncGetBytes() { + this.str.reset(); + const bytes = this.str.getBytes(); + + try { + const { readable, writable } = new DecompressionStream("deflate"); + const writer = writable.getWriter(); + writer.write(bytes); + writer.close(); + + const chunks = []; + let totalLength = 0; + + for await (const chunk of readable) { + chunks.push(chunk); + totalLength += chunk.byteLength; + } + const data = new Uint8Array(totalLength); + let offset = 0; + for (const chunk of chunks) { + data.set(chunk, offset); + offset += chunk.byteLength; + } + + return data; + } catch { + this.str = new Stream(bytes, 2, bytes.length, this.str.dict); + this.reset(); + return null; + } + } + getBits(bits) { const str = this.str; let codeSize = this.codeSize; diff --git a/src/core/image.js b/src/core/image.js index 7a9eb098fdafe0..ca3b73b0bce83e 100644 --- a/src/core/image.js +++ b/src/core/image.js @@ -565,7 +565,7 @@ class PDFImage { return output; } - fillOpacity(rgbaBuf, width, height, actualHeight, image) { + async fillOpacity(rgbaBuf, width, height, actualHeight, image) { if (typeof PDFJSDev === "undefined" || PDFJSDev.test("TESTING")) { assert( rgbaBuf instanceof Uint8ClampedArray, @@ -580,7 +580,7 @@ class PDFImage { sw = smask.width; sh = smask.height; alphaBuf = new Uint8ClampedArray(sw * sh); - smask.fillGrayBuffer(alphaBuf); + await smask.fillGrayBuffer(alphaBuf); if (sw !== width || sh !== height) { alphaBuf = resizeImageMask(alphaBuf, smask.bpc, sw, sh, width, height); } @@ -590,7 +590,7 @@ class PDFImage { sh = mask.height; alphaBuf = new Uint8ClampedArray(sw * sh); mask.numComps = 1; - mask.fillGrayBuffer(alphaBuf); + await mask.fillGrayBuffer(alphaBuf); // Need to invert values in rgbaBuf for (i = 0, ii = sw * sh; i < ii; ++i) { @@ -716,7 +716,7 @@ class PDFImage { drawWidth === originalWidth && drawHeight === originalHeight ) { - const data = this.getImageBytes(originalHeight * rowBytes, {}); + const data = await this.getImageBytes(originalHeight * rowBytes, {}); if (isOffscreenCanvasSupported) { if (mustBeResized) { return ImageResizer.createImage( @@ -774,7 +774,7 @@ class PDFImage { } if (isHandled) { - const rgba = this.getImageBytes(imageLength, { + const rgba = await this.getImageBytes(imageLength, { drawWidth, drawHeight, forceRGBA: true, @@ -794,7 +794,7 @@ class PDFImage { case "DeviceRGB": case "DeviceCMYK": imgData.kind = ImageKind.RGB_24BPP; - imgData.data = this.getImageBytes(imageLength, { + imgData.data = await this.getImageBytes(imageLength, { drawWidth, drawHeight, forceRGB: true, @@ -809,7 +809,7 @@ class PDFImage { } } - const imgArray = this.getImageBytes(originalHeight * rowBytes, { + const imgArray = await this.getImageBytes(originalHeight * rowBytes, { internal: true, }); // imgArray can be incomplete (e.g. after CCITT fax encoding). @@ -852,7 +852,7 @@ class PDFImage { maybeUndoPreblend = true; // Color key masking (opacity) must be performed before decoding. - this.fillOpacity(data, drawWidth, drawHeight, actualHeight, comps); + await this.fillOpacity(data, drawWidth, drawHeight, actualHeight, comps); } if (this.needsDecode) { @@ -893,7 +893,7 @@ class PDFImage { return imgData; } - fillGrayBuffer(buffer) { + async fillGrayBuffer(buffer) { if (typeof PDFJSDev === "undefined" || PDFJSDev.test("TESTING")) { assert( buffer instanceof Uint8ClampedArray, @@ -913,7 +913,9 @@ class PDFImage { // rows start at byte boundary const rowBytes = (width * numComps * bpc + 7) >> 3; - const imgArray = this.getImageBytes(height * rowBytes, { internal: true }); + const imgArray = await this.getImageBytes(height * rowBytes, { + internal: true, + }); const comps = this.getComponents(imgArray); let i, length; @@ -975,7 +977,7 @@ class PDFImage { }; } - getImageBytes( + async getImageBytes( length, { drawWidth, @@ -990,7 +992,10 @@ class PDFImage { this.image.drawHeight = drawHeight || this.height; this.image.forceRGBA = !!forceRGBA; this.image.forceRGB = !!forceRGB; - const imageBytes = this.image.getBytes(length, this.ignoreColorSpace); + const imageBytes = await this.image.getImageData( + length, + this.ignoreColorSpace + ); // If imageBytes came from a DecodeStream, we're safe to transfer it // (and thus detach its underlying buffer) because it will constitute diff --git a/src/core/jbig2_stream.js b/src/core/jbig2_stream.js index bbea4c53971dcb..4f3f0ba3ab495d 100644 --- a/src/core/jbig2_stream.js +++ b/src/core/jbig2_stream.js @@ -44,8 +44,15 @@ class Jbig2Stream extends DecodeStream { } readBlock() { + return this.decodeImage(); + } + + decodeImage(bytes) { if (this.eof) { - return; + return this.buffer; + } + if (!bytes) { + bytes = this.bytes; } const jbig2Image = new Jbig2Image(); @@ -57,7 +64,7 @@ class Jbig2Stream extends DecodeStream { chunks.push({ data: globals, start: 0, end: globals.length }); } } - chunks.push({ data: this.bytes, start: 0, end: this.bytes.length }); + chunks.push({ data: bytes, start: 0, end: bytes.length }); const data = jbig2Image.parseChunks(chunks); const dataLength = data.length; @@ -68,6 +75,12 @@ class Jbig2Stream extends DecodeStream { this.buffer = data; this.bufferLength = dataLength; this.eof = true; + + return this.buffer; + } + + get canAsyncDecodeImageFromBuffer() { + return this.stream.isAsync; } } diff --git a/src/core/jpeg_stream.js b/src/core/jpeg_stream.js index fcfe3df17ad864..c00432f873e456 100644 --- a/src/core/jpeg_stream.js +++ b/src/core/jpeg_stream.js @@ -24,16 +24,6 @@ import { shadow } from "../shared/util.js"; */ class JpegStream extends DecodeStream { constructor(stream, maybeLength, params) { - // Some images may contain 'junk' before the SOI (start-of-image) marker. - // Note: this seems to mainly affect inline images. - let ch; - while ((ch = stream.getByte()) !== -1) { - // Find the first byte of the SOI marker (0xFFD8). - if (ch === 0xff) { - stream.skip(-1); // Reset the stream position to the SOI. - break; - } - } super(maybeLength); this.stream = stream; @@ -53,8 +43,24 @@ class JpegStream extends DecodeStream { } readBlock() { + return this.decodeImage(); + } + + decodeImage(bytes) { if (this.eof) { - return; + return this.buffer; + } + if (!bytes) { + bytes = this.bytes; + } + + // Some images may contain 'junk' before the SOI (start-of-image) marker. + // Note: this seems to mainly affect inline images. + for (let i = 0, ii = bytes.length - 1; i < ii; i++) { + if (bytes[i] === 0xff && bytes[i + 1] === 0xd8) { + bytes = bytes.subarray(i); + break; + } } const jpegOptions = { decodeTransform: undefined, @@ -89,7 +95,7 @@ class JpegStream extends DecodeStream { } const jpegImage = new JpegImage(jpegOptions); - jpegImage.parse(this.bytes); + jpegImage.parse(bytes); const data = jpegImage.getData({ width: this.drawWidth, height: this.drawHeight, @@ -100,6 +106,12 @@ class JpegStream extends DecodeStream { this.buffer = data; this.bufferLength = data.length; this.eof = true; + + return this.buffer; + } + + get canAsyncDecodeImageFromBuffer() { + return this.stream.isAsync; } } diff --git a/src/core/jpx_stream.js b/src/core/jpx_stream.js index 7f8d1d520626d1..e73ca42ac91b1f 100644 --- a/src/core/jpx_stream.js +++ b/src/core/jpx_stream.js @@ -42,13 +42,25 @@ class JpxStream extends DecodeStream { } readBlock(ignoreColorSpace) { + return this.decodeImage(null, ignoreColorSpace); + } + + decodeImage(bytes, ignoreColorSpace) { if (this.eof) { - return; + return this.buffer; } - - this.buffer = JpxImage.decode(this.bytes, ignoreColorSpace); + if (!bytes) { + bytes = this.bytes; + } + this.buffer = JpxImage.decode(bytes, ignoreColorSpace); this.bufferLength = this.buffer.length; this.eof = true; + + return this.buffer; + } + + get canAsyncDecodeImageFromBuffer() { + return this.stream.isAsync; } }