From edb8e9a3fa5869cfb935479a262f6f61b0a2ec57 Mon Sep 17 00:00:00 2001 From: isaacs Date: Tue, 24 Aug 2021 23:02:10 -0700 Subject: [PATCH] fix: perf regression on hot string munging path This doesn't change any functionality, but it optimizes a few extremely hot code paths for the input typically encountered during a large npm project installation. `String.normalize()` is cached, and trailing-slash removal is done with a single `String.slice()`, rather than multiple slices and `String.length` comparisons. It is extremely rare that any code path is ever hot enough for this kind of thing to be relevant enough to justify this sort of microoptimization, but these two issues resulted in a 25-50% install time increase in some cases, which is fairly dramatic. Fix: https://github.com/npm/cli/issues/3676 PR-URL: https://github.com/npm/node-tar/pull/286 Credit: @isaacs Close: #286 Reviewed-by: @wraithgar --- lib/normalize-unicode.js | 11 +++++++++++ lib/path-reservations.js | 9 ++++----- lib/strip-trailing-slashes.js | 31 ++++++++++--------------------- lib/unpack.js | 4 ++-- test/normalize-unicode.js | 12 ++++++++++++ test/strip-trailing-slashes.js | 1 + 6 files changed, 40 insertions(+), 28 deletions(-) create mode 100644 lib/normalize-unicode.js create mode 100644 test/normalize-unicode.js diff --git a/lib/normalize-unicode.js b/lib/normalize-unicode.js new file mode 100644 index 00000000..4aeb1d50 --- /dev/null +++ b/lib/normalize-unicode.js @@ -0,0 +1,11 @@ +// warning: extremely hot code path. +// This has been meticulously optimized for use +// within npm install on large package trees. +// Do not edit without careful benchmarking. +const normalizeCache = Object.create(null) +const {hasOwnProperty} = Object.prototype +module.exports = s => { + if (!hasOwnProperty.call(normalizeCache, s)) + normalizeCache[s] = s.normalize('NFKD') + return normalizeCache[s] +} diff --git a/lib/path-reservations.js b/lib/path-reservations.js index 8d0ead9b..8183c45f 100644 --- a/lib/path-reservations.js +++ b/lib/path-reservations.js @@ -7,7 +7,7 @@ // while still allowing maximal safe parallelization. const assert = require('assert') -const normPath = require('./normalize-windows-path.js') +const normalize = require('./normalize-unicode.js') const stripSlashes = require('./strip-trailing-slashes.js') const { join } = require('path') @@ -28,7 +28,7 @@ module.exports = () => { const getDirs = path => { const dirs = path.split('/').slice(0, -1).reduce((set, path) => { if (set.length) - path = normPath(join(set[set.length - 1], path)) + path = join(set[set.length - 1], path) set.push(path || '/') return set }, []) @@ -116,9 +116,8 @@ module.exports = () => { // So, we just pretend that every path matches every other path here, // effectively removing all parallelization on windows. paths = isWindows ? ['win32 parallelization disabled'] : paths.map(p => { - return stripSlashes(normPath(join(p))) - .normalize('NFKD') - .toLowerCase() + // don't need normPath, because we skip this entirely for windows + return normalize(stripSlashes(join(p))).toLowerCase() }) const dirs = new Set( diff --git a/lib/strip-trailing-slashes.js b/lib/strip-trailing-slashes.js index f702ed5a..3e3ecec5 100644 --- a/lib/strip-trailing-slashes.js +++ b/lib/strip-trailing-slashes.js @@ -1,24 +1,13 @@ -// this is the only approach that was significantly faster than using -// str.replace(/\/+$/, '') for strings ending with a lot of / chars and -// containing multiple / chars. -const batchStrings = [ - '/'.repeat(1024), - '/'.repeat(512), - '/'.repeat(256), - '/'.repeat(128), - '/'.repeat(64), - '/'.repeat(32), - '/'.repeat(16), - '/'.repeat(8), - '/'.repeat(4), - '/'.repeat(2), - '/', -] - +// warning: extremely hot code path. +// This has been meticulously optimized for use +// within npm install on large package trees. +// Do not edit without careful benchmarking. module.exports = str => { - for (const s of batchStrings) { - while (str.length >= s.length && str.slice(-1 * s.length) === s) - str = str.slice(0, -1 * s.length) + let i = str.length - 1 + let slashesStart = -1 + while (i > -1 && str.charAt(i) === '/') { + slashesStart = i + i-- } - return str + return slashesStart === -1 ? str : str.slice(0, slashesStart) } diff --git a/lib/unpack.js b/lib/unpack.js index 7f397f10..7d39dc0f 100644 --- a/lib/unpack.js +++ b/lib/unpack.js @@ -17,6 +17,7 @@ const pathReservations = require('./path-reservations.js') const stripAbsolutePath = require('./strip-absolute-path.js') const normPath = require('./normalize-windows-path.js') const stripSlash = require('./strip-trailing-slashes.js') +const normalize = require('./normalize-unicode.js') const ONENTRY = Symbol('onEntry') const CHECKFS = Symbol('checkFs') @@ -101,8 +102,7 @@ const uint32 = (a, b, c) => // Note that on windows, we always drop the entire cache whenever a // symbolic link is encountered, because 8.3 filenames are impossible // to reason about, and collisions are hazards rather than just failures. -const cacheKeyNormalize = path => stripSlash(normPath(path)) - .normalize('NFKD') +const cacheKeyNormalize = path => normalize(stripSlash(normPath(path))) .toLowerCase() const pruneCache = (cache, abs) => { diff --git a/test/normalize-unicode.js b/test/normalize-unicode.js new file mode 100644 index 00000000..c5d8be08 --- /dev/null +++ b/test/normalize-unicode.js @@ -0,0 +1,12 @@ +const t = require('tap') +const normalize = require('../lib/normalize-unicode.js') + +// cafeĢ +const cafe1 = Buffer.from([0x63, 0x61, 0x66, 0xc3, 0xa9]).toString() + +// cafe with a ` +const cafe2 = Buffer.from([0x63, 0x61, 0x66, 0x65, 0xcc, 0x81]).toString() + +t.equal(normalize(cafe1), normalize(cafe2), 'matching unicodes') +t.equal(normalize(cafe1), normalize(cafe2), 'cached') +t.equal(normalize('foo'), 'foo', 'non-unicdoe string') diff --git a/test/strip-trailing-slashes.js b/test/strip-trailing-slashes.js index 198797bf..ce0695f8 100644 --- a/test/strip-trailing-slashes.js +++ b/test/strip-trailing-slashes.js @@ -3,5 +3,6 @@ const stripSlash = require('../lib/strip-trailing-slashes.js') const short = '///a///b///c///' const long = short.repeat(10) + '/'.repeat(1000000) +t.equal(stripSlash('no slash'), 'no slash') t.equal(stripSlash(short), '///a///b///c') t.equal(stripSlash(long), short.repeat(9) + '///a///b///c')