Skip to content
This repository has been archived by the owner on Oct 29, 2020. It is now read-only.

Commit

Permalink
fix: prevent sharp-related memory leak
Browse files Browse the repository at this point in the history
Probably this: lovell/sharp#1803. Tried pngjs (not compatible enough), jimp (too slow), and finally back to canvas which we've used before. It is a native dependency but doesn't seem to have the particular pathological behavior that sharp does while loading blank pages repeatedly.
  • Loading branch information
eventualbuddha committed Oct 17, 2020
1 parent 8d6ba26 commit baabf57
Show file tree
Hide file tree
Showing 10 changed files with 556 additions and 70 deletions.
2 changes: 2 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
]
},
"dependencies": {
"@types/pngjs": "^3.4.2",
"@types/sharp": "^0.26.0",
"@votingworks/ballot-encoder": "^4.0.0",
"@votingworks/hmpb-interpreter": "^5.2.12",
Expand All @@ -41,6 +42,7 @@
"debug": "^4.1.1",
"express": "^4.16.4",
"fs-extra": "^8.0.1",
"jimp": "^0.16.1",
"memory-streams": "^0.1.3",
"multer": "^1.4.2",
"node-quirc": "^2.2.1",
Expand Down
15 changes: 5 additions & 10 deletions src/cli/render-pages.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@ import main from './render-pages'
import { WritableStream } from 'memory-streams'
import { ballotPdf } from '../../test/fixtures/choctaw-2020-09-22-f30480cc99'
import { promises as fs } from 'fs'
import sharp from 'sharp'
import { join } from 'path'
import { pathExists } from 'fs-extra'
import { tmpNameSync } from 'tmp'
import { loadImageData } from '../util/images'

function fakeOutput(): WritableStream & NodeJS.WriteStream {
return new WritableStream() as WritableStream & NodeJS.WriteStream
Expand Down Expand Up @@ -54,17 +54,12 @@ test('generates one PNG image per PDF page', async () => {
stderr: '',
})

expect(await sharp(join(tmpDir, 'ballot-p1.png')).metadata()).toEqual(
expect.objectContaining({
for (const filename of ['ballot-p1.png', 'ballot-p2.png']) {
const { width, height } = await loadImageData(join(tmpDir, filename))
expect({ width, height }).toEqual({
width: 1224,
height: 1584,
})
)
expect(await sharp(join(tmpDir, 'ballot-p2.png')).metadata()).toEqual(
expect.objectContaining({
width: 1224,
height: 1584,
})
)
}
expect(await pathExists(join(tmpDir, 'ballot-p3.png'))).toBe(false)
})
14 changes: 3 additions & 11 deletions src/cli/render-pages.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { promises as fs } from 'fs'
import type { Channels } from 'sharp'
import { join, dirname, extname, basename } from 'path'
import chalk from 'chalk'
import { writeFile } from 'fs-extra'

export function printHelp(out: typeof process.stdout): void {
out.write(`${chalk.bold('render-pages')} PDF ${chalk.italic('[PDF …]')}\n`)
Expand Down Expand Up @@ -43,7 +43,7 @@ export default async function main(

// Defer loading the heavy dependencies until we're sure we need them.
const { default: pdfToImages } = await import('../util/pdfToImages')
const { default: sharp } = await import('sharp')
const { toPNG } = await import('../util/images')

for (const pdfPath of pdfPaths) {
const pdfDir = dirname(pdfPath)
Expand All @@ -54,15 +54,7 @@ export default async function main(
for await (const { page, pageNumber } of pdfToImages(pdf, { scale: 2 })) {
const pngPath = join(pdfDir, `${pdfBase}-p${pageNumber}.png`)
stdout.write(`📝 ${pngPath}\n`)
await sharp(Buffer.from(page.data), {
raw: {
channels: (page.data.length / page.width / page.height) as Channels,
width: page.width,
height: page.height,
},
})
.png()
.toFile(pngPath)
await writeFile(pngPath, await toPNG(page))
}
}

Expand Down
17 changes: 2 additions & 15 deletions src/importer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ import * as fsExtra from 'fs-extra'
import * as streams from 'memory-streams'
import { join } from 'path'
import { sync as rimraf } from 'rimraf'
import sharp, { Raw } from 'sharp'
import { v4 as uuid } from 'uuid'
import { PageInterpretation } from './interpreter'
import { Scanner } from './scanner'
Expand All @@ -17,6 +16,7 @@ import {
SheetOf,
Side,
} from './types'
import { toPNG } from './util/images'
import pdfToImages from './util/pdfToImages'
import { call, Input, InterpretOutput, Output } from './workers/interpret'
import { inlinePool, WorkerPool } from './workers/pool'
Expand Down Expand Up @@ -72,20 +72,7 @@ export async function saveImages(

if (normalizedImage) {
debug('about to write normalized ballot image to %s', normalizedImagePath)
await fsExtra.writeFile(
normalizedImagePath,
await sharp(Buffer.from(normalizedImage.data.buffer), {
raw: {
width: normalizedImage.width,
height: normalizedImage.height,
channels: (normalizedImage.data.length /
(normalizedImage.width *
normalizedImage.height)) as Raw['channels'],
},
})
.png()
.toBuffer()
)
await fsExtra.writeFile(normalizedImagePath, await toPNG(normalizedImage))
debug('wrote normalized ballot image to %s', normalizedImagePath)
return { original: originalImagePath, normalized: normalizedImagePath }
}
Expand Down
9 changes: 2 additions & 7 deletions src/interpreter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,12 @@ import {
Size,
} from '@votingworks/hmpb-interpreter'
import makeDebug from 'debug'
import sharp from 'sharp'
import { BallotMetadata, isErrorResult, Result, SheetOf } from './types'
import { AdjudicationInfo } from './types/ballot-review'
import ballotAdjudicationReasons, {
adjudicationReasonDescription,
} from './util/ballotAdjudicationReasons'
import { loadImageData } from './util/images'
import optionMarkStatus from './util/optionMarkStatus'
import { time } from './util/perf'
import { detectQRCode } from './util/qrcode'
Expand Down Expand Up @@ -117,12 +117,7 @@ export async function getBallotImageData(
file: Buffer,
filename: string
): Promise<Result<PageInterpretation, BallotImageData>> {
const img = sharp(file).raw().ensureAlpha()
const {
data,
info: { width, height },
} = await img.toBuffer({ resolveWithObject: true })

const { data, width, height } = await loadImageData(file)
const imageThreshold = threshold(data)
if (
imageThreshold.foreground.ratio < MAXIMUM_BLANK_PAGE_FOREGROUND_PIXEL_RATIO
Expand Down
10 changes: 5 additions & 5 deletions src/store.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@ import { createHash } from 'crypto'
import makeDebug from 'debug'
import { promises as fs } from 'fs'
import { join } from 'path'
import sharp from 'sharp'
import * as sqlite3 from 'sqlite3'
import { Writable } from 'stream'
import { inspect } from 'util'
import { v4 as uuid } from 'uuid'
import { buildCastVoteRecord } from './buildCastVoteRecord'
import {
Expand All @@ -32,13 +32,13 @@ import {
AdjudicationStatus,
BallotMetadata,
BatchInfo,
ElectionDefinition,
getMarkStatus,
isMarginalMark,
PageInterpretationWithFiles,
SerializableBallotPageLayout,
SheetOf,
Side,
ElectionDefinition,
} from './types'
import {
AdjudicationInfo,
Expand All @@ -47,11 +47,11 @@ import {
MarksByContestId,
ReviewBallot,
} from './types/ballot-review'
import { BallotSheetInfo } from './util/ballotAdjudicationReasons'
import allContestOptions from './util/allContestOptions'
import { BallotSheetInfo } from './util/ballotAdjudicationReasons'
import getBallotPageContests from './util/getBallotPageContests'
import { loadImageData } from './util/images'
import { changesFromMarks, mergeChanges } from './util/marks'
import { inspect } from 'util'

const debug = makeDebug('module-scan:store')

Expand Down Expand Up @@ -614,7 +614,7 @@ export default class Store {
return marks.ballotSize
}

const { width, height } = await sharp(row.normalizedFilename).metadata()
const { width, height } = await loadImageData(row.normalizedFilename)

if (!width || !height) {
throw new Error(
Expand Down
24 changes: 24 additions & 0 deletions src/util/images.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import { createCanvas, createImageData, loadImage } from 'canvas'

function ensureImageData(imageData: ImageData): ImageData {
return createImageData(imageData.data, imageData.width, imageData.height)
}

export async function loadImageData(path: string): Promise<ImageData>
export async function loadImageData(data: Buffer): Promise<ImageData>
export async function loadImageData(
pathOrData: string | Buffer
): Promise<ImageData> {
const img = await loadImage(pathOrData)
const canvas = createCanvas(img.width, img.height)
const context = canvas.getContext('2d')
context.drawImage(img, 0, 0)
return context.getImageData(0, 0, img.width, img.height)
}

export async function toPNG(imageData: ImageData): Promise<Buffer> {
const canvas = createCanvas(imageData.width, imageData.height)
const context = canvas.getContext('2d')
context.putImageData(ensureImageData(imageData), 0, 0)
return canvas.toBuffer()
}
15 changes: 2 additions & 13 deletions src/util/qrcode.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import crop from '@votingworks/hmpb-interpreter/dist/src/utils/crop'
import { detect as qrdetect } from '@votingworks/qrdetect'
import makeDebug from 'debug'
import { decode as quircDecode } from 'node-quirc'
import sharp, { Channels } from 'sharp'
import { toPNG } from './images'
import { time } from './perf'

const debug = makeDebug('module-scan:qrcode')
Expand Down Expand Up @@ -118,18 +118,7 @@ export const detectQRCode = async (
const cropped = crop(imageData, bounds)

debug('generating PNG for quirc')
const { data, width, height } = cropped
const img = await sharp(Buffer.from(data), {
raw: {
channels: (data.length / width / height) as Channels,
width,
height,
},
})
.raw()
.ensureAlpha()
.png()
.toBuffer()
const img = await toPNG(cropped)

debug('scanning with quirc')
const results = (await quircDecode(img))[0]
Expand Down
12 changes: 6 additions & 6 deletions test/util/mocks.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
import { ChildProcess } from 'child_process'
import { EventEmitter } from 'events'
import sharp from 'sharp'
import { writeFile } from 'fs-extra'
import { Readable, Writable } from 'stream'
import { fileSync } from 'tmp'
import { Importer } from '../../src/importer'
import { Scanner } from '../../src/scanner'
import { SheetOf } from '../../src/types'
import { toPNG } from '../../src/util/images'
import { inlinePool, WorkerOps, WorkerPool } from '../../src/workers/pool'

export function mockWorkerPoolProvider<I, O>(
Expand Down Expand Up @@ -269,10 +270,9 @@ export function makeMockChildProcess(): MockChildProcess {

export async function makeImageFile(): Promise<string> {
const imageFile = fileSync()
await sharp({
create: { width: 1, height: 1, channels: 3, background: '#000' },
})
.png()
.toFile(imageFile.name)
await writeFile(
imageFile.name,
await toPNG({ data: Uint8ClampedArray.of(0, 0, 0), width: 1, height: 1 })
)
return imageFile.name
}
Loading

0 comments on commit baabf57

Please sign in to comment.