diff --git a/src/core/base_stream.js b/src/core/base_stream.js index f10d266ecd8be..0dd8553768dec 100644 --- a/src/core/base_stream.js +++ b/src/core/base_stream.js @@ -44,6 +44,27 @@ class BaseStream { unreachable("Abstract method `getBytes` called"); } + /** + * NOTE: This method can only be used to get image-data that is guaranteed + * to be fully loaded, since otherwise intermittent errors may occur; + * note the `ObjectLoader` class. + */ + async getImageData(length, ignoreColorSpace) { + return this.getBytes(length, ignoreColorSpace); + } + + async asyncGetBytes() { + unreachable("Abstract method `asyncGetBytes` called"); + } + + get isAsync() { + return false; + } + + get canAsyncDecodeImageFromBuffer() { + return false; + } + peekByte() { const peekedByte = this.getByte(); if (peekedByte !== -1) { diff --git a/src/core/decode_stream.js b/src/core/decode_stream.js index f2208d726ab6d..4f0bde5820c35 100644 --- a/src/core/decode_stream.js +++ b/src/core/decode_stream.js @@ -99,6 +99,14 @@ class DecodeStream extends BaseStream { return this.buffer.subarray(pos, end); } + async getImageData(length, ignoreColorSpace = false) { + if (!this.canAsyncDecodeImageFromBuffer) { + return this.getBytes(length, ignoreColorSpace); + } + const data = await this.stream.asyncGetBytes(); + return this.decodeImage(data, ignoreColorSpace); + } + reset() { this.pos = 0; } diff --git a/src/core/flate_stream.js b/src/core/flate_stream.js index f2df7a17c98d8..06ceefecb979c 100644 --- a/src/core/flate_stream.js +++ b/src/core/flate_stream.js @@ -21,6 +21,7 @@ import { FormatError, info } from "../shared/util.js"; import { DecodeStream } from "./decode_stream.js"; +import { Stream } from "./stream.js"; const codeLenCodeMap = new Int32Array([ 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15, @@ -148,6 +149,57 @@ class FlateStream extends DecodeStream { this.codeBuf = 0; } + async getImageData(length, _ignoreColorSpace) { + const data = await this.asyncGetBytes(); + return data?.subarray(0, length) || this.getBytes(length); + } + + async asyncGetBytes() { + this.str.reset(); + const bytes = this.str.getBytes(); + + try { + const { readable, writable } = new DecompressionStream("deflate"); + const writer = writable.getWriter(); + writer.write(bytes); + writer.close(); + + const chunks = []; + let totalLength = 0; + + for await (const chunk of readable) { + chunks.push(chunk); + totalLength += chunk.byteLength; + } + const data = new Uint8Array(totalLength); + let offset = 0; + for (const chunk of chunks) { + data.set(chunk, offset); + offset += chunk.byteLength; + } + + return data; + } catch { + // DecompressionStream failed (for example because there are some extra + // bytes after the end of the compressed data), so we fallback to our + // decoder. + // We already get the bytes from the underlying stream, so we just reuse + // them to avoid get them again. + this.str = new Stream( + bytes, + 2 /* = header size (see ctor) */, + bytes.length, + this.str.dict + ); + this.reset(); + return null; + } + } + + get isAsync() { + return true; + } + getBits(bits) { const str = this.str; let codeSize = this.codeSize; diff --git a/src/core/image.js b/src/core/image.js index 7a9eb098fdafe..ca3b73b0bce83 100644 --- a/src/core/image.js +++ b/src/core/image.js @@ -565,7 +565,7 @@ class PDFImage { return output; } - fillOpacity(rgbaBuf, width, height, actualHeight, image) { + async fillOpacity(rgbaBuf, width, height, actualHeight, image) { if (typeof PDFJSDev === "undefined" || PDFJSDev.test("TESTING")) { assert( rgbaBuf instanceof Uint8ClampedArray, @@ -580,7 +580,7 @@ class PDFImage { sw = smask.width; sh = smask.height; alphaBuf = new Uint8ClampedArray(sw * sh); - smask.fillGrayBuffer(alphaBuf); + await smask.fillGrayBuffer(alphaBuf); if (sw !== width || sh !== height) { alphaBuf = resizeImageMask(alphaBuf, smask.bpc, sw, sh, width, height); } @@ -590,7 +590,7 @@ class PDFImage { sh = mask.height; alphaBuf = new Uint8ClampedArray(sw * sh); mask.numComps = 1; - mask.fillGrayBuffer(alphaBuf); + await mask.fillGrayBuffer(alphaBuf); // Need to invert values in rgbaBuf for (i = 0, ii = sw * sh; i < ii; ++i) { @@ -716,7 +716,7 @@ class PDFImage { drawWidth === originalWidth && drawHeight === originalHeight ) { - const data = this.getImageBytes(originalHeight * rowBytes, {}); + const data = await this.getImageBytes(originalHeight * rowBytes, {}); if (isOffscreenCanvasSupported) { if (mustBeResized) { return ImageResizer.createImage( @@ -774,7 +774,7 @@ class PDFImage { } if (isHandled) { - const rgba = this.getImageBytes(imageLength, { + const rgba = await this.getImageBytes(imageLength, { drawWidth, drawHeight, forceRGBA: true, @@ -794,7 +794,7 @@ class PDFImage { case "DeviceRGB": case "DeviceCMYK": imgData.kind = ImageKind.RGB_24BPP; - imgData.data = this.getImageBytes(imageLength, { + imgData.data = await this.getImageBytes(imageLength, { drawWidth, drawHeight, forceRGB: true, @@ -809,7 +809,7 @@ class PDFImage { } } - const imgArray = this.getImageBytes(originalHeight * rowBytes, { + const imgArray = await this.getImageBytes(originalHeight * rowBytes, { internal: true, }); // imgArray can be incomplete (e.g. after CCITT fax encoding). @@ -852,7 +852,7 @@ class PDFImage { maybeUndoPreblend = true; // Color key masking (opacity) must be performed before decoding. - this.fillOpacity(data, drawWidth, drawHeight, actualHeight, comps); + await this.fillOpacity(data, drawWidth, drawHeight, actualHeight, comps); } if (this.needsDecode) { @@ -893,7 +893,7 @@ class PDFImage { return imgData; } - fillGrayBuffer(buffer) { + async fillGrayBuffer(buffer) { if (typeof PDFJSDev === "undefined" || PDFJSDev.test("TESTING")) { assert( buffer instanceof Uint8ClampedArray, @@ -913,7 +913,9 @@ class PDFImage { // rows start at byte boundary const rowBytes = (width * numComps * bpc + 7) >> 3; - const imgArray = this.getImageBytes(height * rowBytes, { internal: true }); + const imgArray = await this.getImageBytes(height * rowBytes, { + internal: true, + }); const comps = this.getComponents(imgArray); let i, length; @@ -975,7 +977,7 @@ class PDFImage { }; } - getImageBytes( + async getImageBytes( length, { drawWidth, @@ -990,7 +992,10 @@ class PDFImage { this.image.drawHeight = drawHeight || this.height; this.image.forceRGBA = !!forceRGBA; this.image.forceRGB = !!forceRGB; - const imageBytes = this.image.getBytes(length, this.ignoreColorSpace); + const imageBytes = await this.image.getImageData( + length, + this.ignoreColorSpace + ); // If imageBytes came from a DecodeStream, we're safe to transfer it // (and thus detach its underlying buffer) because it will constitute diff --git a/src/core/jbig2_stream.js b/src/core/jbig2_stream.js index bbea4c53971dc..126669c811c45 100644 --- a/src/core/jbig2_stream.js +++ b/src/core/jbig2_stream.js @@ -44,9 +44,14 @@ class Jbig2Stream extends DecodeStream { } readBlock() { + this.decodeImage(); + } + + decodeImage(bytes) { if (this.eof) { - return; + return this.buffer; } + bytes ||= this.bytes; const jbig2Image = new Jbig2Image(); const chunks = []; @@ -57,7 +62,7 @@ class Jbig2Stream extends DecodeStream { chunks.push({ data: globals, start: 0, end: globals.length }); } } - chunks.push({ data: this.bytes, start: 0, end: this.bytes.length }); + chunks.push({ data: bytes, start: 0, end: bytes.length }); const data = jbig2Image.parseChunks(chunks); const dataLength = data.length; @@ -68,6 +73,12 @@ class Jbig2Stream extends DecodeStream { this.buffer = data; this.bufferLength = dataLength; this.eof = true; + + return this.buffer; + } + + get canAsyncDecodeImageFromBuffer() { + return this.stream.isAsync; } } diff --git a/src/core/jpeg_stream.js b/src/core/jpeg_stream.js index fcfe3df17ad86..32cacae59b889 100644 --- a/src/core/jpeg_stream.js +++ b/src/core/jpeg_stream.js @@ -24,16 +24,6 @@ import { shadow } from "../shared/util.js"; */ class JpegStream extends DecodeStream { constructor(stream, maybeLength, params) { - // Some images may contain 'junk' before the SOI (start-of-image) marker. - // Note: this seems to mainly affect inline images. - let ch; - while ((ch = stream.getByte()) !== -1) { - // Find the first byte of the SOI marker (0xFFD8). - if (ch === 0xff) { - stream.skip(-1); // Reset the stream position to the SOI. - break; - } - } super(maybeLength); this.stream = stream; @@ -53,8 +43,24 @@ class JpegStream extends DecodeStream { } readBlock() { + this.decodeImage(); + } + + decodeImage(bytes) { if (this.eof) { - return; + return this.buffer; + } + bytes ||= this.bytes; + + // Some images may contain 'junk' before the SOI (start-of-image) marker. + // Note: this seems to mainly affect inline images. + for (let i = 0, ii = bytes.length - 1; i < ii; i++) { + if (bytes[i] === 0xff && bytes[i + 1] === 0xd8) { + if (i > 0) { + bytes = bytes.subarray(i); + } + break; + } } const jpegOptions = { decodeTransform: undefined, @@ -89,7 +95,7 @@ class JpegStream extends DecodeStream { } const jpegImage = new JpegImage(jpegOptions); - jpegImage.parse(this.bytes); + jpegImage.parse(bytes); const data = jpegImage.getData({ width: this.drawWidth, height: this.drawHeight, @@ -100,6 +106,12 @@ class JpegStream extends DecodeStream { this.buffer = data; this.bufferLength = data.length; this.eof = true; + + return this.buffer; + } + + get canAsyncDecodeImageFromBuffer() { + return this.stream.isAsync; } } diff --git a/src/core/jpx_stream.js b/src/core/jpx_stream.js index 7f8d1d520626d..9da001f99f0a1 100644 --- a/src/core/jpx_stream.js +++ b/src/core/jpx_stream.js @@ -42,13 +42,23 @@ class JpxStream extends DecodeStream { } readBlock(ignoreColorSpace) { + this.decodeImage(null, ignoreColorSpace); + } + + decodeImage(bytes, ignoreColorSpace) { if (this.eof) { - return; + return this.buffer; } - - this.buffer = JpxImage.decode(this.bytes, ignoreColorSpace); + bytes ||= this.bytes; + this.buffer = JpxImage.decode(bytes, ignoreColorSpace); this.bufferLength = this.buffer.length; this.eof = true; + + return this.buffer; + } + + get canAsyncDecodeImageFromBuffer() { + return this.stream.isAsync; } }