From 722c0faa387ae6e35886f08eefb238c03ae85db1 Mon Sep 17 00:00:00 2001 From: Gar Date: Thu, 2 May 2024 13:42:18 -0700 Subject: [PATCH] fix: limit packument cache size based on heap size This adds a new packument cache that is an instance of `lru-cache`. It uses that package's ability to limit content based on size, and has some multipliers based on research to mostly correctly approximate the correlation between packument size and its memory usage. It also limits the total size of the cache based on the actual heap available. Closes: https://github.com/npm/cli/issues/7276 Related: https://github.com/npm/pacote/pull/369 --- DEPENDENCIES.md | 1 + smoke-tests/test/fixtures/setup.js | 22 ++++-- smoke-tests/test/large-install.js | 14 +++- smoke-tests/test/npm-replace-global.js | 2 +- test/lib/commands/audit.js | 1 + workspaces/arborist/lib/arborist/index.js | 4 +- workspaces/arborist/lib/node.js | 2 + workspaces/arborist/lib/packument-cache.js | 77 ++++++++++++++++++++ workspaces/arborist/test/arborist/rebuild.js | 2 +- workspaces/arborist/test/audit-report.js | 2 +- 10 files changed, 113 insertions(+), 14 deletions(-) create mode 100644 workspaces/arborist/lib/packument-cache.js diff --git a/DEPENDENCIES.md b/DEPENDENCIES.md index 89b2792ac512a..9cfb7eeedd687 100644 --- a/DEPENDENCIES.md +++ b/DEPENDENCIES.md @@ -582,6 +582,7 @@ graph LR; npmcli-arborist-->isaacs-string-locale-compare["@isaacs/string-locale-compare"]; npmcli-arborist-->json-parse-even-better-errors; npmcli-arborist-->json-stringify-nice; + npmcli-arborist-->lru-cache; npmcli-arborist-->minify-registry-metadata; npmcli-arborist-->minimatch; npmcli-arborist-->nock; diff --git a/smoke-tests/test/fixtures/setup.js b/smoke-tests/test/fixtures/setup.js index d2500507119a8..91e0ddec2415f 100644 --- a/smoke-tests/test/fixtures/setup.js +++ b/smoke-tests/test/fixtures/setup.js @@ -74,7 +74,7 @@ const getCleanPaths = async () => { } module.exports = async (t, { testdir = {}, debug, mockRegistry = true, useProxy = false } = {}) => { - const debugLog = debug || CI ? (...a) => console.error(...a) : () => {} + const debugLog = debug || CI ? (...a) => t.comment(...a) : () => {} const cleanPaths = await getCleanPaths() // setup fixtures @@ -158,7 +158,7 @@ module.exports = async (t, { testdir = {}, debug, mockRegistry = true, useProxy log(`${spawnCmd} ${spawnArgs.join(' ')}`) log('-'.repeat(40)) - const { stderr, stdout } = await spawn(spawnCmd, spawnArgs, { + const p = spawn(spawnCmd, spawnArgs, { cwd, env: { ...getEnvPath(), @@ -169,10 +169,20 @@ module.exports = async (t, { testdir = {}, debug, mockRegistry = true, useProxy ...opts, }) - log(stderr) - log('-'.repeat(40)) - log(stdout) - log('='.repeat(40)) + // In debug mode, stream stdout and stderr to console so we can debug hanging processes + if (debug) { + p.process.stdout.on('data', (c) => log('STDOUT: ' + c.toString().trim())) + p.process.stderr.on('data', (c) => log('STDERR: ' + c.toString().trim())) + } + + const { stdout, stderr } = await p + // If not in debug mode, print full stderr and stdout contents separately + if (!debug) { + log(stderr) + log('-'.repeat(40)) + log(stdout) + log('='.repeat(40)) + } return { stderr, stdout } } diff --git a/smoke-tests/test/large-install.js b/smoke-tests/test/large-install.js index a44579904e6fd..a9260c8d3f0f1 100644 --- a/smoke-tests/test/large-install.js +++ b/smoke-tests/test/large-install.js @@ -5,18 +5,20 @@ const setup = require('./fixtures/setup.js') const getFixture = (p) => require( path.resolve(__dirname, './fixtures/large-install', p)) -const runTest = async (t) => { +const runTest = async (t, { lowMemory } = {}) => { const { npm } = await setup(t, { // This test relies on the actual production registry mockRegistry: false, testdir: { project: { 'package.json': getFixture('package.json'), - 'package-lock.json': getFixture('package-lock.json'), + ...lowMemory ? {} : { 'package-lock.json': getFixture('package-lock.json') }, }, }, }) - return npm('install', '--no-audit', '--no-fund') + return npm('install', '--no-audit', '--no-fund', { + env: lowMemory ? { NODE_OPTIONS: '--max-old-space-size=500' } : {}, + }) } // This test was originally created for https://github.com/npm/cli/issues/6763 @@ -31,3 +33,9 @@ t.test('large install', async t => { // installs the same number of packages. t.match(stdout, `added 126${process.platform === 'linux' ? 4 : 5} packages in`) }) + +t.test('large install, no lock and low memory', async t => { + // Run the same install but with no lockfile and constrained max-old-space-size + const { stdout } = await runTest(t, { lowMemory: true }) + t.match(stdout, /added \d+ packages/) +}) diff --git a/smoke-tests/test/npm-replace-global.js b/smoke-tests/test/npm-replace-global.js index 03a976ab6ccf3..12364e6899d9f 100644 --- a/smoke-tests/test/npm-replace-global.js +++ b/smoke-tests/test/npm-replace-global.js @@ -125,7 +125,7 @@ t.test('publish and replace global self', async t => { await npmPackage({ manifest: { packuments: [publishedPackument] }, tarballs: { [version]: tarball }, - times: 2, + times: 3, }) await fs.rm(cache, { recursive: true, force: true }) await useNpm('install', 'npm@latest', '--global') diff --git a/test/lib/commands/audit.js b/test/lib/commands/audit.js index 701d374ade985..d8714cb61912a 100644 --- a/test/lib/commands/audit.js +++ b/test/lib/commands/audit.js @@ -184,6 +184,7 @@ t.test('audit fix - bulk endpoint', async t => { tarballs: { '1.0.1': path.join(npm.prefix, 'test-dep-a-fixed'), }, + times: 2, }) const advisory = registry.advisory({ id: 100, vulnerable_versions: '1.0.0' }) registry.nock.post('/-/npm/v1/security/advisories/bulk', body => { diff --git a/workspaces/arborist/lib/arborist/index.js b/workspaces/arborist/lib/arborist/index.js index ba180f354708a..b348d490def65 100644 --- a/workspaces/arborist/lib/arborist/index.js +++ b/workspaces/arborist/lib/arborist/index.js @@ -31,10 +31,10 @@ const { homedir } = require('os') const { depth } = require('treeverse') const mapWorkspaces = require('@npmcli/map-workspaces') const { log, time } = require('proc-log') - const { saveTypeMap } = require('../add-rm-pkg-deps.js') const AuditReport = require('../audit-report.js') const relpath = require('../relpath.js') +const PackumentCache = require('../packument-cache.js') const mixins = [ require('../tracker.js'), @@ -82,7 +82,7 @@ class Arborist extends Base { installStrategy: options.global ? 'shallow' : (options.installStrategy ? options.installStrategy : 'hoisted'), lockfileVersion: lockfileVersion(options.lockfileVersion), packageLockOnly: !!options.packageLockOnly, - packumentCache: options.packumentCache || new Map(), + packumentCache: options.packumentCache || new PackumentCache(), path: options.path || '.', rebuildBundle: 'rebuildBundle' in options ? !!options.rebuildBundle : true, replaceRegistryHost: options.replaceRegistryHost, diff --git a/workspaces/arborist/lib/node.js b/workspaces/arborist/lib/node.js index bdc021b7c12a9..d5c5a478cce0b 100644 --- a/workspaces/arborist/lib/node.js +++ b/workspaces/arborist/lib/node.js @@ -119,6 +119,8 @@ class Node { // package's dependencies in a virtual root. this.sourceReference = sourceReference + // TODO if this came from pacote.manifest we don't have to do this, + // we can be told to skip this step const pkg = sourceReference ? sourceReference.package : normalize(options.pkg || {}) diff --git a/workspaces/arborist/lib/packument-cache.js b/workspaces/arborist/lib/packument-cache.js new file mode 100644 index 0000000000000..26e463eb9d334 --- /dev/null +++ b/workspaces/arborist/lib/packument-cache.js @@ -0,0 +1,77 @@ +const { LRUCache } = require('lru-cache') +const { getHeapStatistics } = require('node:v8') +const { log } = require('proc-log') + +// This is an in-memory cache that Pacote uses for packuments. +// Packuments are usually cached on disk. This allows for rapid re-requests +// of the same packument to bypass disk reads. The tradeoff here is memory +// usage for disk reads. +class PackumentCache extends LRUCache { + static #heapLimit = Math.floor(getHeapStatistics().heap_size_limit) + + #sizeKey + #disposed = new Set() + + #log (...args) { + log.silly('packumentCache', ...args) + } + + constructor ({ + // How much of this.#heapLimit to take up + heapFactor = 0.25, + // How much of this.#maxSize we allow any one packument to take up + // Anything over this is not cached + maxEntryFactor = 0.5, + sizeKey = '_contentLength', + } = {}) { + const maxSize = Math.floor(PackumentCache.#heapLimit * heapFactor) + const maxEntrySize = Math.floor(maxSize * maxEntryFactor) + super({ + maxSize, + maxEntrySize, + sizeCalculation: (p) => { + // Don't cache if we dont know the size + // Some versions of pacote set this to `0`, newer versions set it to `null` + if (!p[sizeKey]) { + return maxEntrySize + 1 + } + if (p[sizeKey] < 10_000) { + return p[sizeKey] * 2 + } + if (p[sizeKey] < 1_000_000) { + return Math.floor(p[sizeKey] * 1.5) + } + // It is less beneficial to store a small amount of super large things + // at the cost of all other packuments. + return maxEntrySize + 1 + }, + dispose: (v, k) => { + this.#disposed.add(k) + this.#log(k, 'dispose') + }, + }) + this.#sizeKey = sizeKey + this.#log(`heap:${PackumentCache.#heapLimit} maxSize:${maxSize} maxEntrySize:${maxEntrySize}`) + } + + set (k, v, ...args) { + // we use disposed only for a logging signal if we are setting packuments that + // have already been evicted from the cache previously. logging here could help + // us tune this in the future. + const disposed = this.#disposed.has(k) + /* istanbul ignore next - this doesnt happen consistently so hard to test without resorting to unit tests */ + if (disposed) { + this.#disposed.delete(k) + } + this.#log(k, 'set', `size:${v[this.#sizeKey]} disposed:${disposed}`) + return super.set(k, v, ...args) + } + + has (k, ...args) { + const has = super.has(k, ...args) + this.#log(k, `cache-${has ? 'hit' : 'miss'}`) + return has + } +} + +module.exports = PackumentCache diff --git a/workspaces/arborist/test/arborist/rebuild.js b/workspaces/arborist/test/arborist/rebuild.js index 2843155f2809e..7cf6c381197d2 100644 --- a/workspaces/arborist/test/arborist/rebuild.js +++ b/workspaces/arborist/test/arborist/rebuild.js @@ -197,7 +197,7 @@ t.test('verify dep flags in script environments', async t => { const file = resolve(path, 'node_modules', pkg, 'env') t.strictSame(flags, fs.readFileSync(file, 'utf8').split('\n'), pkg) } - t.strictSame(checkLogs().sort((a, b) => + t.strictSame(checkLogs().filter(l => l[0] === 'info' && l[1] === 'run').sort((a, b) => localeCompare(a[2], b[2]) || (typeof a[4] === 'string' ? -1 : 1)), [ ['info', 'run', 'devdep@1.0.0', 'postinstall', 'node_modules/devdep', 'node ../../env.js'], ['info', 'run', 'devdep@1.0.0', 'postinstall', { code: 0, signal: null }], diff --git a/workspaces/arborist/test/audit-report.js b/workspaces/arborist/test/audit-report.js index de2cd429c96cc..18751b9cd96a5 100644 --- a/workspaces/arborist/test/audit-report.js +++ b/workspaces/arborist/test/audit-report.js @@ -222,7 +222,7 @@ t.test('audit returns an error', async t => { const report = await AuditReport.load(tree, arb.options) t.equal(report.report, null, 'did not get audit response') t.equal(report.size, 0, 'did not find any vulnerabilities') - t.match(logs, [ + t.match(logs.filter(l => l[1].includes('audit')), [ [ 'silly', 'audit',