diff --git a/workspaces/arborist/lib/arborist/build-ideal-tree.js b/workspaces/arborist/lib/arborist/build-ideal-tree.js index 9631a3eca890f..ce26ba5ebdf3b 100644 --- a/workspaces/arborist/lib/arborist/build-ideal-tree.js +++ b/workspaces/arborist/lib/arborist/build-ideal-tree.js @@ -1,19 +1,23 @@ -// mixin implementing the buildIdealTree method +// XXX test/shrinkwrap.js tests fail if this is not first. Something weird is going on here +const { readdirScoped } = require('@npmcli/fs') + +// mixin implementing the buildIdealTree, loadVirtual, and loadActual methods const localeCompare = require('@isaacs/string-locale-compare')('en') const PackageJson = require('@npmcli/package-json') const npa = require('npm-package-arg') const pacote = require('pacote') const cacache = require('cacache') const { callLimit: promiseCallLimit } = require('promise-call-limit') -const realpath = require('../../lib/realpath.js') -const { resolve, dirname, sep } = require('node:path') -const treeCheck = require('../tree-check.js') -const { readdirScoped } = require('@npmcli/fs') +const { dirname, join, normalize, relative, resolve, sep } = require('node:path') const { lstat, readlink } = require('node:fs/promises') const { depth } = require('treeverse') const { log, time } = require('proc-log') const { redact } = require('@npmcli/redact') const semver = require('semver') +const mapWorkspaces = require('@npmcli/map-workspaces') +const nameFromFolder = require('@npmcli/name-from-folder') +const { walkUp } = require('walk-up-path') +const ancestorPath = require('common-ancestor-path') const { OK, @@ -22,6 +26,7 @@ const { } = require('../can-place-dep.js') const PlaceDep = require('../place-dep.js') +const realpath = require('../realpath.js') const debug = require('../debug.js') const fromPath = require('../from-path.js') const calcDepFlags = require('../calc-dep-flags.js') @@ -34,17 +39,16 @@ const optionalSet = require('../optional-set.js') const { checkEngine, checkPlatform } = require('npm-install-checks') const relpath = require('../relpath.js') const resetDepFlags = require('../reset-dep-flags.js') +const consistentResolve = require('../consistent-resolve.js') +const treeCheck = require('../tree-check.js') -// note: some of these symbols are shared so we can hit -// them with unit tests and reuse them across mixins -const _updateAll = Symbol.for('updateAll') -const _flagsSuspect = Symbol.for('flagsSuspect') +// note: some of these symbols are shared so we can hit them with unit tests and reuse them across mixins +// TODO we should not do this +const _changePath = Symbol.for('_changePath') +const _resolvedAdd = Symbol.for('resolvedAdd') const _setWorkspaces = Symbol.for('setWorkspaces') +const _updateAll = Symbol.for('updateAll') const _updateNames = Symbol.for('updateNames') -const _resolvedAdd = Symbol.for('resolvedAdd') -const _usePackageLock = Symbol.for('usePackageLock') -const _rpcache = Symbol.for('realpathCache') -const _stcache = Symbol.for('statCache') // used by Reify mixin const _addNodeToTrashList = Symbol.for('addNodeToTrashList') @@ -78,11 +82,19 @@ class DepsQueue { } module.exports = cls => class IdealTreeBuilder extends cls { + #actualTree + // ensure when walking the tree that we don't call loadTree on the same actual node more than one time. + #actualTreeLoaded = new Set() + #actualTreePromise + // cache of nodes when loading the actualTree, so that we avoid loaded the same node multiple times when symlinks attack. + #cache = new Map() #complete #currentDep = null #depsQueue = new DepsQueue() #depsSeen = new Set() #explicitRequests = new Set() + #filter + #flagsSuspect #follow #installStrategy #linkNodes = new Set() @@ -95,41 +107,34 @@ module.exports = cls => class IdealTreeBuilder extends cls { #peerSetSource = new WeakMap() #preferDedupe = false #prune + #realpathCache + #rootOptionProvided + #statCache = new Map() #strictPeerDeps + // cache of link targets for setting fsParent links + // We don't do fsParent as a magic getter/setter, because it'd be too costly to keep up to date along the walk. + // And, we know that it can ONLY be relevant when the node is a target of a link; otherwise, it'd be in a node_modules folder, so take advantage of that to limit the scans later. + #topNodes = new Set() + #transplantFilter #virtualRoots = new Map() constructor (options) { super(options) - // normalize trailing slash - const registry = options.registry || 'https://registry.npmjs.org' - options.registry = this.registry = registry.replace(/\/+$/, '') + '/' - const { follow = false, installStrategy = 'hoisted', - idealTree = null, - installLinks = false, - legacyPeerDeps = false, - packageLock = true, strictPeerDeps = false, - workspaces, global, } = options this.#strictPeerDeps = !!strictPeerDeps - - this.idealTree = idealTree - this.installLinks = installLinks - this.legacyPeerDeps = legacyPeerDeps - - this[_usePackageLock] = packageLock this.#installStrategy = global ? 'shallow' : installStrategy this.#follow = !!follow - - if (workspaces?.length && global) { - throw new Error('Cannot operate on workspaces in global mode') - } + // caches for cached realpath calls + const cwd = process.cwd() + // assume that the cwd is real enough for our purposes + this.#realpathCache = new Map([[cwd, cwd]]) this[_updateAll] = false this[_updateNames] = [] @@ -164,11 +169,9 @@ module.exports = cls => class IdealTreeBuilder extends cls { throw new Error('global requires add, rm, or update option') } - // first get the virtual tree, if possible. If there's a lockfile, then - // that defines the ideal tree, unless the root package.json is not - // satisfied by what the ideal tree provides. - // from there, we start adding nodes to it to satisfy the deps requested - // by the package.json in the root. + // First get the virtual tree, if possible. + // If there's a lockfile, then that defines the ideal tree, unless the root package.json is not satisfied by what the ideal tree provides. + // From there, we start adding nodes to it to satisfy the deps requested by the package.json in the root. this.#parseSettings(options) @@ -298,7 +301,7 @@ module.exports = cls => class IdealTreeBuilder extends cls { .then(root => { if (this.options.global) { return root - } else if (!this[_usePackageLock] || this[_updateAll]) { + } else if (!this.options.usePackageLock || this[_updateAll]) { return Shrinkwrap.reset({ path: this.path, lockfileVersion: this.options.lockfileVersion, @@ -388,7 +391,7 @@ module.exports = cls => class IdealTreeBuilder extends cls { // before ever loading trees. // TODO: make buildIdealTree() and loadActual handle a missing root path, // or a symlink to a missing target, and let reify() create it as needed. - const real = await realpath(this.path, this[_rpcache], this[_stcache]) + const real = await realpath(this.path, this.#realpathCache, this.#statCache) const Cls = real === this.path ? Node : Link const root = new Cls({ path: this.path, @@ -550,7 +553,7 @@ module.exports = cls => class IdealTreeBuilder extends cls { spec.name = name } else if (spec.type === 'directory') { try { - const real = await realpath(spec.fetchSpec, this[_rpcache], this[_stcache]) + const real = await realpath(spec.fetchSpec, this.#realpathCache, this.#statCache) spec = npa(`file:${relpath(path, real)}`, path) spec.name = name } catch { @@ -792,7 +795,48 @@ This is a one-time fix-up, please be patient... } if (!this.#depsQueue.length) { - return this.#resolveLinks() + // go through all the links in the this.#linkNodes set + // for each one: + // - if outside the root, ignore it, assume it's fine, it's not our problem + // - if a node in the tree already, assign the target to that node. + // - if a path under an existing node, then assign that as the fsParent, + // and add it to the _depsQueue + // + // call buildDepStep if anything was added to the queue; otherwise, we're done + for (const link of this.#linkNodes) { + this.#linkNodes.delete(link) + + // link we never ended up placing, skip it + if (link.root !== this.idealTree) { + continue + } + + const tree = this.idealTree.target + const external = !link.target.isDescendantOf(tree) + + // outside the root, somebody else's problem, ignore it + if (external && !this.#follow) { + continue + } + + // didn't find a parent for it or it has not been seen yet + // so go ahead and process it. + const unseenLink = (link.target.parent || link.target.fsParent) && + !this.#depsSeen.has(link.target) + + if (this.#follow && + !link.target.parent && + !link.target.fsParent || + unseenLink) { + this.addTracker('idealTree', link.target.name, link.target.location) + this.#depsQueue.push(link.target) + } + } + + if (this.#depsQueue.length) { + return this.#buildDepStep() + } + return } const node = this.#depsQueue.pop() @@ -1231,7 +1275,7 @@ This is a one-time fix-up, please be patient... } } - #nodeFromSpec (name, spec, parent, edge) { + async #nodeFromSpec (name, spec, parent, edge) { // pacote will slap integrity on its options, so we have to clone // the object so it doesn't get mutated. // Don't bother to load the manifest for link deps, because the target @@ -1260,7 +1304,13 @@ This is a one-time fix-up, please be patient... // Decide whether to link or copy the dependency const shouldLink = (isWorkspace || isProjectInternalFileSpec || !installLinks) && !isTransitiveFileDep if (spec.type === 'directory' && shouldLink) { - return this.#linkFromSpec(name, spec, parent, edge) + const realpath = spec.fetchSpec + const { content: pkg } = await PackageJson.normalize(realpath).catch(() => { + return { content: {} } + }) + const link = new Link({ name, parent, realpath, pkg, installLinks, legacyPeerDeps }) + this.#linkNodes.add(link) + return link } // if the spec matches a workspace name, then see if the workspace node will satisfy the edge. if it does, we return the workspace node to make sure it takes priority. @@ -1301,17 +1351,6 @@ This is a one-time fix-up, please be patient... }) } - async #linkFromSpec (name, spec, parent) { - const realpath = spec.fetchSpec - const { installLinks, legacyPeerDeps } = this - const { content: pkg } = await PackageJson.normalize(realpath).catch(() => { - return { content: {} } - }) - const link = new Link({ name, parent, realpath, pkg, installLinks, legacyPeerDeps }) - this.#linkNodes.add(link) - return link - } - // load all peer deps and meta-peer deps into the node's parent // At the end of this, the node's peer-type outward edges are all // resolved, and so are all of theirs, but other dep types are not. @@ -1438,54 +1477,9 @@ This is a one-time fix-up, please be patient... } } - // go through all the links in the this.#linkNodes set - // for each one: - // - if outside the root, ignore it, assume it's fine, it's not our problem - // - if a node in the tree already, assign the target to that node. - // - if a path under an existing node, then assign that as the fsParent, - // and add it to the _depsQueue - // - // call buildDepStep if anything was added to the queue; otherwise, we're done - #resolveLinks () { - for (const link of this.#linkNodes) { - this.#linkNodes.delete(link) - - // link we never ended up placing, skip it - if (link.root !== this.idealTree) { - continue - } - - const tree = this.idealTree.target - const external = !link.target.isDescendantOf(tree) - - // outside the root, somebody else's problem, ignore it - if (external && !this.#follow) { - continue - } - - // didn't find a parent for it or it has not been seen yet - // so go ahead and process it. - const unseenLink = (link.target.parent || link.target.fsParent) && - !this.#depsSeen.has(link.target) - - if (this.#follow && - !link.target.parent && - !link.target.fsParent || - unseenLink) { - this.addTracker('idealTree', link.target.name, link.target.location) - this.#depsQueue.push(link.target) - } - } - - if (this.#depsQueue.length) { - return this.#buildDepStep() - } - } - #fixDepFlags () { const timeEnd = time.start('idealTree:fixDepFlags') const metaFromDisk = this.idealTree.meta.loadedFromDisk - const flagsSuspect = this[_flagsSuspect] const mutateTree = this.#mutateTree // if the options set prune:false, then we don't prune, but we still // mark the extraneous items in the tree if we modified it at all. @@ -1516,7 +1510,7 @@ This is a one-time fix-up, please be patient... // if we started from a shrinkwrap, and then added/removed something, // then the tree is suspect. Prune what is marked as extraneous. // otherwise, don't bother. - const needPrune = metaFromDisk && (mutateTree || flagsSuspect) + const needPrune = metaFromDisk && (mutateTree || this.#flagsSuspect) if (this.#prune && needPrune) { this.#idealTreePrune() } @@ -1596,4 +1590,621 @@ This is a one-time fix-up, please be patient... return this.reify(options) } + + // public method + async loadVirtual (options = {}) { + if (this.virtualTree) { + return this.virtualTree + } + + // allow the user to set reify options on the ctor as well. + // XXX: deprecate separate instance method options object. + options = { ...this.options, ...options } + + if (options.root && options.root.meta) { + await this.#loadFromShrinkwrap(options.root.meta, options.root) + return treeCheck(this.virtualTree) + } + + const s = await Shrinkwrap.load({ + path: this.path, + lockfileVersion: this.options.lockfileVersion, + resolveOptions: this.options, + }) + if (!s.loadedFromDisk && !options.root) { + const er = new Error('loadVirtual requires existing shrinkwrap file') + throw Object.assign(er, { code: 'ENOLOCK' }) + } + + // when building the ideal tree, we pass in a root node to this function otherwise, load it from the root package json or the lockfile + const pkg = await PackageJson.normalize(this.path).then(p => p.content).catch(() => s.data.packages[''] || {}) + // TODO clean this up + const { + root = await this[_setWorkspaces](this.#loadNode('', pkg, true)), + } = options + this.#rootOptionProvided = options.root + + await this.#loadFromShrinkwrap(s, root) + root.assertRootOverrides() + return treeCheck(this.virtualTree) + } + + async #loadFromShrinkwrap (s, root) { + if (!this.#rootOptionProvided) { + // root is never any of these things, but might be a brand new baby Node object that never had its dep flags calculated. + root.unsetDepFlags() + } else { + this.#flagsSuspect = true + } + + this.#checkRootEdges(s, root) + root.meta = s + this.virtualTree = root + // separate out link metadata, and create Node objects for nodes + // links is the set of metadata, and nodes is the map of non-Link nodes + const links = new Map() + const nodes = new Map([['', root]]) + for (const [location, meta] of Object.entries(s.data.packages)) { + // skip the root because we already got it + if (!location) { + continue + } + + if (meta.link) { + links.set(location, meta) + } else { + nodes.set(location, this.#loadNode(location, meta)) + } + } + // set the targets to nodes in the set, if we have them (we might not) + for (const [location, meta] of links.entries()) { + const targetPath = resolve(this.path, meta.resolved) + const targetLoc = relpath(this.path, targetPath) + const target = nodes.get(targetLoc) + + if (!target) { + const err = new Error( +`Missing target in lock file: "${targetLoc}" is referenced by "${location}" but does not exist. +To fix: +1. rm package-lock.json +2. npm install` + ) + err.code = 'EMISSINGTARGET' + throw err + } + + const link = new Link({ + installLinks: this.installLinks, + legacyPeerDeps: this.legacyPeerDeps, + path: resolve(this.path, location), + realpath: resolve(this.path, targetLoc), + target, + pkg: target && target.package, + }) + link.extraneous = target.extraneous + link.devOptional = target.devOptional + link.peer = target.peer + link.optional = target.optional + link.dev = target.dev + nodes.set(location, link) + nodes.set(targetLoc, link.target) + + // we always need to read the package.json for link targets + // outside node_modules because they can be changed by the local user + if (!link.target.parent) { + await PackageJson.normalize(link.realpath).then(p => link.target.package = p.content).catch(() => null) + } + } + if (!(s.originalLockfileVersion >= 2)) { + this.#assignBundles(nodes) + } + if (this.#flagsSuspect) { + // reset all dep flags + // can't use inventory here, because virtualTree might not be root + for (const node of nodes.values()) { + if (node.isRoot || node === this.#rootOptionProvided) { + continue + } + node.resetDepFlags() + } + calcDepFlags(this.virtualTree, !this.#rootOptionProvided) + } + return root + } + + // check the lockfile deps, and see if they match. if they do not then we have to reset dep flags at the end. + // for example, if the user manually edits their package.json file, then we need to know that the idealTree is no longer entirely trustworthy. + #checkRootEdges (s, root) { + // loaded virtually from tree, no chance of being out of sync ancient lockfiles are critically damaged by this process, so we need to just hope for the best in those cases. + if (!s.loadedFromDisk || s.ancientLockfile) { + return + } + + const lock = s.get('') + const prod = lock.dependencies || {} + const dev = lock.devDependencies || {} + const optional = lock.optionalDependencies || {} + const peer = lock.peerDependencies || {} + const peerOptional = {} + + if (lock.peerDependenciesMeta) { + for (const [name, meta] of Object.entries(lock.peerDependenciesMeta)) { + if (meta.optional && peer[name] !== undefined) { + peerOptional[name] = peer[name] + delete peer[name] + } + } + } + + for (const name of Object.keys(optional)) { + delete prod[name] + } + + const lockWS = {} + const workspaces = mapWorkspaces.virtual({ + cwd: this.path, + lockfile: s.data, + }) + + for (const [name, path] of workspaces.entries()) { + lockWS[name] = `file:${path}` + } + + // Should rootNames exclude optional? + const rootNames = new Set(root.edgesOut.keys()) + + const lockByType = ({ dev, optional, peer, peerOptional, prod, workspace: lockWS }) + + // Find anything in shrinkwrap deps that doesn't match root's type or spec + for (const type in lockByType) { + const deps = lockByType[type] + for (const name in deps) { + const edge = root.edgesOut.get(name) + if (!edge || edge.type !== type || edge.spec !== deps[name]) { + return this.#flagsSuspect = true + } + rootNames.delete(name) + } + } + // Something was in root that's not accounted for in shrinkwrap + if (rootNames.size) { + return this.#flagsSuspect = true + } + } + + #assignBundles (nodes) { + for (const [location, node] of nodes) { + // Skip assignment of parentage for the root package + if (!location || node.isLink && !node.target.location) { + continue + } + const { name, parent, package: { inBundle } } = node + + if (!parent) { + continue + } + + // read inBundle from package because 'package' here is + // actually a v2 lockfile metadata entry. + // If the *parent* is also bundled, though, or if the parent has + // no dependency on it, then we assume that it's being pulled in + // just by virtue of its parent or a transitive dep being bundled. + const { package: ppkg } = parent + const { inBundle: parentBundled } = ppkg + if (inBundle && !parentBundled && parent.edgesOut.has(node.name)) { + if (!ppkg.bundleDependencies) { + ppkg.bundleDependencies = [name] + } else { + ppkg.bundleDependencies.push(name) + } + } + } + } + + #loadNode (location, sw, loadOverrides) { + const p = this.virtualTree ? this.virtualTree.realpath : this.path + const path = resolve(p, location) + // shrinkwrap doesn't include package name unless necessary + if (!sw.name) { + sw.name = nameFromFolder(path) + } + + const node = new Node({ + installLinks: this.installLinks, + legacyPeerDeps: this.legacyPeerDeps, + root: this.virtualTree, + path, + realpath: path, + integrity: sw.integrity, + resolved: consistentResolve(sw.resolved, this.path, path), + pkg: sw, + hasShrinkwrap: sw.hasShrinkwrap, + loadOverrides, + // cast to boolean because they're undefined in the lock file when false + extraneous: !!sw.extraneous, + devOptional: !!(sw.devOptional || sw.dev || sw.optional), + peer: !!sw.peer, + optional: !!sw.optional, + dev: !!sw.dev, + }) + + return node + } + + // public method + // TODO remove options param in next semver major + async loadActual (options = {}) { + // In the past this.actualTree was set as a promise that eventually resolved, and overwrote this.actualTree with the resolved value. + // This was a problem because virtually no other code expects this.actualTree to be a promise. + // Instead we only set it once resolved, and also return it from the promise so that it is what's returned from this function when awaited. + if (this.actualTree) { + return this.actualTree + } + if (!this.#actualTreePromise) { + // allow the user to set options on the ctor as well. + // XXX: deprecate separate method options objects. + options = { ...this.options, ...options } + + this.#actualTreePromise = this.#loadActual(options) + .then(tree => { + // reset all deps to extraneous prior to recalc + if (!options.root) { + for (const node of tree.inventory.values()) { + node.extraneous = true + } + } + + // only reset root flags if we're not re-rooting, + // otherwise leave as-is + calcDepFlags(tree, !options.root) + this.actualTree = treeCheck(tree) + return this.actualTree + }) + } + // return the promise so that we don't ever have more than one going at the same time. + // This is so that buildIdealTree can default to the actualTree if no shrinkwrap present, but reify() can still call buildIdealTree and loadActual in parallel safely. + // "buildIdealTree can default to the actualTree" where is this? + // XXX this.#actualTree vs this.actualTree? + return this.#actualTreePromise + } + + async #loadActual (options) { + // mostly realpath to throw if the root doesn't exist + const { + global, + filter = () => true, + root = null, + transplantFilter = () => true, + ignoreMissing = false, + forceActual = false, + } = options + this.#filter = filter + this.#transplantFilter = transplantFilter + + if (global) { + const real = await realpath(this.path, this.#realpathCache, this.#statCache) + const params = { + path: this.path, + realpath: real, + pkg: {}, + global, + loadOverrides: true, + } + if (this.path === real) { + this.#actualTree = this.#newNode(params) + } else { + this.#actualTree = await this.#newLink(params) + } + } else { + // not in global mode, hidden lockfile is allowed, load root pkg too + this.#actualTree = await this.#loadFSNode({ + path: this.path, + real: await realpath(this.path, this.#realpathCache, this.#statCache), + loadOverrides: true, + }) + + this.#actualTree.assertRootOverrides() + + // if forceActual is set, don't even try the hidden lockfile + if (!forceActual) { + // Note: hidden lockfile will be rejected if it's not the latest thing + // in the folder, or if any of the entries in the hidden lockfile are + // missing. + const meta = await Shrinkwrap.load({ + path: this.#actualTree.path, + hiddenLockfile: true, + resolveOptions: this.options, + }) + + if (meta.loadedFromDisk) { + this.#actualTree.meta = meta + // have to load on a new Arborist object, so we don't assign + // the virtualTree on this one! Also, the weird reference is because + // we can't easily get a ref to Arborist in this module, without + // creating a circular reference, since this class is a mixin used + // to build up the Arborist class itself. + await new this.constructor({ ...this.options }).loadVirtual({ + root: this.#actualTree, + }) + await this[_setWorkspaces](this.#actualTree) + + this.#transplant(root) + return this.#actualTree + } + } + + const meta = await Shrinkwrap.load({ + path: this.#actualTree.path, + lockfileVersion: this.options.lockfileVersion, + resolveOptions: this.options, + }) + this.#actualTree.meta = meta + } + + await this.#loadFSTree(this.#actualTree) + await this[_setWorkspaces](this.#actualTree) + + // if there are workspace targets without Link nodes created, load + // the targets, so that we know what they are. + if (this.#actualTree.workspaces && this.#actualTree.workspaces.size) { + const promises = [] + for (const path of this.#actualTree.workspaces.values()) { + if (!this.#cache.has(path)) { + // workspace overrides use the root overrides + const p = this.#loadFSNode({ path, root: this.#actualTree, useRootOverrides: true }) + .then(node => this.#loadFSTree(node)) + promises.push(p) + } + } + await Promise.all(promises) + } + + if (!ignoreMissing) { + await this.#findMissingEdges() + } + + // try to find a node that is the parent in a fs tree sense, but not a + // node_modules tree sense, of any link targets. this allows us to + // resolve deps that node will find, but a legacy npm view of the + // world would not have noticed. + for (const path of this.#topNodes) { + const node = this.#cache.get(path) + if (node && !node.parent && !node.fsParent) { + for (const p of walkUp(dirname(path))) { + if (this.#cache.has(p)) { + node.fsParent = this.#cache.get(p) + break + } + } + } + } + + this.#transplant(root) + + if (global) { + // need to depend on the children, or else all of them + // will end up being flagged as extraneous, since the + // global root isn't a "real" project + const tree = this.#actualTree + const actualRoot = tree.isLink ? tree.target : tree + const { dependencies = {} } = actualRoot.package + for (const [name, kid] of actualRoot.children.entries()) { + const def = kid.isLink ? `file:${kid.realpath}` : '*' + dependencies[name] = dependencies[name] || def + } + actualRoot.package = { ...actualRoot.package, dependencies } + } + return this.#actualTree + } + + #transplant (root) { + if (!root || root === this.#actualTree) { + return + } + + this.#actualTree[_changePath](root.path) + for (const node of this.#actualTree.children.values()) { + if (!this.#transplantFilter(node)) { + node.root = null + } + } + + root.replace(this.#actualTree) + for (const node of this.#actualTree.fsChildren) { + node.root = this.#transplantFilter(node) ? root : null + } + + this.#actualTree = root + } + + async #loadFSNode ({ path, parent, real, root, loadOverrides, useRootOverrides }) { + if (!real) { + try { + real = await realpath(path, this.#realpathCache, this.#statCache) + } catch (error) { + // if realpath fails, just provide a dummy error node + return new Node({ + error, + path, + realpath: path, + parent, + root, + loadOverrides, + }) + } + } + + const cached = this.#cache.get(path) + let node + // missing edges get a dummy node, assign the parent and return it + if (cached && !cached.dummy) { + cached.parent = parent + return cached + } else { + const params = { + installLinks: this.installLinks, + legacyPeerDeps: this.legacyPeerDeps, + path, + realpath: real, + parent, + root, + loadOverrides, + } + + try { + const { content: pkg } = await PackageJson.normalize(real) + params.pkg = pkg + if (useRootOverrides && root.overrides) { + params.overrides = root.overrides.getNodeRule({ name: pkg.name, version: pkg.version }) + } + } catch (err) { + if (err.code === 'EJSONPARSE') { + // TODO @npmcli/package-json should be doing this + err.path = join(real, 'package.json') + } + params.error = err + } + + // soldier on if read-package-json raises an error, passing it to the + // Node which will attach it to its errors array (Link passes it along to + // its target node) + if (normalize(path) === real) { + node = this.#newNode(params) + } else { + node = await this.#newLink(params) + } + } + this.#cache.set(path, node) + return node + } + + #newNode (options) { + // check it for an fsParent if it's a tree top. there's a decent chance + // it'll get parented later, making the fsParent scan a no-op, but better + // safe than sorry, since it's cheap. + const { parent, realpath } = options + if (!parent) { + this.#topNodes.add(realpath) + } + return new Node(options) + } + + async #newLink (options) { + const { realpath } = options + this.#topNodes.add(realpath) + const target = this.#cache.get(realpath) + const link = new Link({ ...options, target }) + + if (!target) { + // Link set its target itself in this case + this.#cache.set(realpath, link.target) + // if a link target points at a node outside of the root tree's + // node_modules hierarchy, then load that node as well. + await this.#loadFSTree(link.target) + } + + return link + } + + async #loadFSTree (node) { + const did = this.#actualTreeLoaded + if (!node.isLink && !did.has(node.target.realpath)) { + did.add(node.target.realpath) + // create child nodes for all the entries in node_modules and attach them to the node as a parent + const nm = resolve(node.target.realpath, 'node_modules') + try { + const kids = await readdirScoped(nm).then(paths => paths.map(p => p.replace(/\\/g, '/'))) + await Promise.all( + // ignore . dirs and retired scoped package folders + kids.filter(kid => !/^(@[^/]+\/)?\./.test(kid)) + .filter(kid => this.#filter(node.target, kid)) + .map(kid => this.#loadFSNode({ + parent: node.target, + path: resolve(nm, kid), + }))) + } catch { + // error in the readdir is not fatal, just means no kids + } + return Promise.all( + [...node.target.children.entries()] + .filter(([, kid]) => !did.has(kid.realpath)) + .map(([, kid]) => this.#loadFSTree(kid)) + ) + } + } + + async #findMissingEdges () { + // try to resolve any missing edges by walking up the directory tree, + // checking for the package in each node_modules folder. stop at the + // root directory. + // The tricky move here is that we load a "dummy" node for the folder + // containing the node_modules folder, so that it can be assigned as + // the fsParent. It's a bad idea to *actually* load that full node, + // because people sometimes develop in ~/projects/node_modules/... + // so we'd end up loading a massive tree with lots of unrelated junk. + const nmContents = new Map() + const tree = this.#actualTree + for (const node of tree.inventory.values()) { + const ancestor = ancestorPath(node.realpath, this.path) + + const depPromises = [] + for (const [name, edge] of node.edgesOut.entries()) { + const notMissing = !edge.missing && + !(edge.to && (edge.to.dummy || edge.to.parent !== node)) + if (notMissing) { + continue + } + + // start the walk from the dirname, because we would have found + // the dep in the loadFSTree step already if it was local. + for (const p of walkUp(dirname(node.realpath))) { + // only walk as far as the nearest ancestor + // this keeps us from going into completely unrelated + // places when a project is just missing something, but + // allows for finding the transitive deps of link targets. + // ie, if it has to go up and back out to get to the path + // from the nearest common ancestor, we've gone too far. + if (ancestor && /^\.\.(?:[\\/]|$)/.test(relative(ancestor, p))) { + break + } + + let entries + if (!nmContents.has(p)) { + entries = await readdirScoped(p + '/node_modules') + .catch(() => []).then(paths => paths.map(p => p.replace(/\\/g, '/'))) + nmContents.set(p, entries) + } else { + entries = nmContents.get(p) + } + + if (!entries.includes(name)) { + continue + } + + let d + if (!this.#cache.has(p)) { + d = new Node({ path: p, root: node.root, dummy: true }) + this.#cache.set(p, d) + } else { + d = this.#cache.get(p) + } + if (d.dummy) { + // it's a placeholder, so likely would not have loaded this dep, + // unless another dep in the tree also needs it. + const depPath = normalize(`${p}/node_modules/${name}`) + const cached = this.#cache.get(depPath) + if (!cached || cached.dummy) { + depPromises.push(this.#loadFSNode({ + path: depPath, + root: node.root, + parent: d, + }).then(node => this.#loadFSTree(node))) + } + } + break + } + } + await Promise.all(depPromises) + } + } } diff --git a/workspaces/arborist/lib/arborist/index.js b/workspaces/arborist/lib/arborist/index.js index 3622f957b7acd..0d03913802763 100644 --- a/workspaces/arborist/lib/arborist/index.js +++ b/workspaces/arborist/lib/arborist/index.js @@ -3,28 +3,25 @@ // - virtual // - ideal // -// The actual tree is what's present on disk in the node_modules tree -// and elsewhere that links may extend. +// The actual tree is what's present on disk in the node_modules tree and elsewhere that links may extend. // // The virtual tree is loaded from metadata (package.json and lock files). // -// The ideal tree is what we WANT that actual tree to become. This starts -// with the virtual tree, and then applies the options requesting -// add/remove/update actions. +// The ideal tree is what we WANT that actual tree to become. This starts with the virtual tree, and then applies the options requesting add/remove/update actions. // -// To reify a tree, we calculate a diff between the ideal and actual trees, -// and then turn the actual tree into the ideal tree by taking the actions -// required. At the end of the reification process, the actualTree is -// updated to reflect the changes. +// To reify a tree, we calculate a diff between the ideal and actual trees, and then turn the actual tree into the ideal tree by taking the actions required. +// At the end of the reification process, the actualTree is updated to reflect the changes. // -// Each tree has an Inventory at the root. Shrinkwrap is tracked by Arborist -// instance. It always refers to the actual tree, but is updated (and written -// to disk) on reification. +// Each tree has an Inventory at the root. +// Shrinkwrap is tracked by Arborist instance. +// It always refers to the actual tree, but is updated (and written to disk) on reification. -// Each of the mixin "classes" adds functionality, but are not dependent on -// constructor call order. So, we just load them in an array, and build up -// the base class, so that the overall voltron class is easier to test and -// cover, and separation of concerns can be maintained. +// Each of the mixin "classes" adds functionality, but are not dependent on constructor call order. +// So, we just load them in an array, and build up the base class, so that the overall voltron class is easier to test and cover, and separation of concerns can be maintained. +// Eventually, each mixin shared enough `Symbol.for` declarations that separation of concerns was not actually happening very well. +// Unit testing was also masking bugs that surfaced when the code was used as a whole, so tests all require Arborist in full now. +// So, this is now moving towards a single large class so that it's easier to weed out duplication in logic and efforts. +// Tests are still separated by main instance method. const { resolve } = require('node:path') const { homedir } = require('node:os') @@ -39,9 +36,6 @@ const PackumentCache = require('../packument-cache.js') const mixins = [ require('../tracker.js'), require('./build-ideal-tree.js'), - require('./load-actual.js'), - require('./load-virtual.js'), - require('./rebuild.js'), require('./reify.js'), require('./isolated-reifier.js'), ] @@ -68,6 +62,20 @@ class Arborist extends Base { constructor (options = {}) { const timeEnd = time.start('arborist:ctor') super(options) + + if (options.workspaces?.length && options.global) { + throw new Error('Cannot operate on workspaces in global mode') + } + // normalize trailing slash + const registry = options.registry || 'https://registry.npmjs.org' + options.registry = this.registry = registry.replace(/\/+$/, '') + '/' + + // the tree of nodes on disk + this.actualTree = options.actualTree + this.idealTree = 'idealTree' in options ? options.idealTree : null + this.installLinks = 'installLinks' in options ? options.installLinks : false + this.legacyPeerDeps = 'legacyPeerDeps' in options ? options.legacyPeerDeps : false + this.options = { nodeVersion: process.version, ...options, @@ -88,12 +96,12 @@ class Arborist extends Base { replaceRegistryHost: options.replaceRegistryHost, savePrefix: 'savePrefix' in options ? options.savePrefix : '^', scriptShell: options.scriptShell, + usePackageLock: 'packageLock' in options ? options.packageLock : true, workspaces: options.workspaces || [], workspacesEnabled: options.workspacesEnabled !== false, } - // TODO we only ever look at this.options.replaceRegistryHost, not - // this.replaceRegistryHost. Defaulting needs to be written back to - // this.options to work properly + // TODO we only ever look at this.options.replaceRegistryHost, not this.replaceRegistryHost. + // Defaulting needs to be written back to this.options to work properly this.replaceRegistryHost = this.options.replaceRegistryHost = (!this.options.replaceRegistryHost || this.options.replaceRegistryHost === 'npmjs') ? 'registry.npmjs.org' : this.options.replaceRegistryHost @@ -104,14 +112,13 @@ class Arborist extends Base { this.cache = resolve(this.options.cache) this.diff = null this.path = resolve(this.options.path) + this.scriptsRun = new Set() timeEnd() } - // TODO: We should change these to static functions instead - // of methods for the next major version + // TODO: We should change these to static functions instead of methods for the next major version - // Get the actual nodes corresponding to a root node's child workspaces, - // given a list of workspace names. + // Get the actual nodes corresponding to a root node's child workspaces, given a list of workspace names. workspaceNodes (tree, workspaces) { const wsMap = tree.workspaces if (!wsMap) { diff --git a/workspaces/arborist/lib/arborist/isolated-reifier.js b/workspaces/arborist/lib/arborist/isolated-reifier.js index 16210296b5a14..9822645d2527c 100644 --- a/workspaces/arborist/lib/arborist/isolated-reifier.js +++ b/workspaces/arborist/lib/arborist/isolated-reifier.js @@ -1,6 +1,5 @@ const _makeIdealGraph = Symbol('makeIdealGraph') const _createIsolatedTree = Symbol.for('createIsolatedTree') -const _createBundledTree = Symbol('createBundledTree') const { mkdirSync } = require('node:fs') const pacote = require('pacote') const { join } = require('node:path') @@ -162,7 +161,7 @@ module.exports = cls => class IsolatedReifier extends cls { result.hasInstallScript = node.hasInstallScript } - async [_createBundledTree] () { + async #createBundledTree () { // TODO: make sure that idealTree object exists const idealTree = this.idealTree // TODO: test workspaces having bundled deps @@ -217,7 +216,7 @@ module.exports = cls => class IsolatedReifier extends cls { const proxiedIdealTree = this.idealGraph - const bundledTree = await this[_createBundledTree]() + const bundledTree = await this.#createBundledTree() const treeHash = (startNode) => { // generate short hash based on the dependency tree diff --git a/workspaces/arborist/lib/arborist/load-actual.js b/workspaces/arborist/lib/arborist/load-actual.js deleted file mode 100644 index 3be44780e01ae..0000000000000 --- a/workspaces/arborist/lib/arborist/load-actual.js +++ /dev/null @@ -1,442 +0,0 @@ -// mix-in implementing the loadActual method - -const { dirname, join, normalize, relative, resolve } = require('node:path') - -const PackageJson = require('@npmcli/package-json') -const { readdirScoped } = require('@npmcli/fs') -const { walkUp } = require('walk-up-path') -const ancestorPath = require('common-ancestor-path') -const treeCheck = require('../tree-check.js') - -const Shrinkwrap = require('../shrinkwrap.js') -const calcDepFlags = require('../calc-dep-flags.js') -const Node = require('../node.js') -const Link = require('../link.js') -const realpath = require('../realpath.js') - -// public symbols -const _changePath = Symbol.for('_changePath') -const _setWorkspaces = Symbol.for('setWorkspaces') -const _rpcache = Symbol.for('realpathCache') -const _stcache = Symbol.for('statCache') - -module.exports = cls => class ActualLoader extends cls { - #actualTree - // ensure when walking the tree that we don't call loadTree on the same - // actual node more than one time. - #actualTreeLoaded = new Set() - #actualTreePromise - - // cache of nodes when loading the actualTree, so that we avoid loaded the - // same node multiple times when symlinks attack. - #cache = new Map() - #filter - - // cache of link targets for setting fsParent links - // We don't do fsParent as a magic getter/setter, because it'd be too costly - // to keep up to date along the walk. - // And, we know that it can ONLY be relevant when the node is a target of a - // link; otherwise, it'd be in a node_modules folder, so take advantage of - // that to limit the scans later. - #topNodes = new Set() - #transplantFilter - - constructor (options) { - super(options) - - // the tree of nodes on disk - this.actualTree = options.actualTree - - // caches for cached realpath calls - const cwd = process.cwd() - // assume that the cwd is real enough for our purposes - this[_rpcache] = new Map([[cwd, cwd]]) - this[_stcache] = new Map() - } - - // public method - // TODO remove options param in next semver major - async loadActual (options = {}) { - // In the past this.actualTree was set as a promise that eventually - // resolved, and overwrite this.actualTree with the resolved value. This - // was a problem because virtually no other code expects this.actualTree to - // be a promise. Instead we only set it once resolved, and also return it - // from the promise so that it is what's returned from this function when - // awaited. - if (this.actualTree) { - return this.actualTree - } - if (!this.#actualTreePromise) { - // allow the user to set options on the ctor as well. - // XXX: deprecate separate method options objects. - options = { ...this.options, ...options } - - this.#actualTreePromise = this.#loadActual(options) - .then(tree => { - // reset all deps to extraneous prior to recalc - if (!options.root) { - for (const node of tree.inventory.values()) { - node.extraneous = true - } - } - - // only reset root flags if we're not re-rooting, - // otherwise leave as-is - calcDepFlags(tree, !options.root) - this.actualTree = treeCheck(tree) - return this.actualTree - }) - } - return this.#actualTreePromise - } - - // return the promise so that we don't ever have more than one going at the - // same time. This is so that buildIdealTree can default to the actualTree - // if no shrinkwrap present, but reify() can still call buildIdealTree and - // loadActual in parallel safely. - - async #loadActual (options) { - // mostly realpath to throw if the root doesn't exist - const { - global, - filter = () => true, - root = null, - transplantFilter = () => true, - ignoreMissing = false, - forceActual = false, - } = options - this.#filter = filter - this.#transplantFilter = transplantFilter - - if (global) { - const real = await realpath(this.path, this[_rpcache], this[_stcache]) - const params = { - path: this.path, - realpath: real, - pkg: {}, - global, - loadOverrides: true, - } - if (this.path === real) { - this.#actualTree = this.#newNode(params) - } else { - this.#actualTree = await this.#newLink(params) - } - } else { - // not in global mode, hidden lockfile is allowed, load root pkg too - this.#actualTree = await this.#loadFSNode({ - path: this.path, - real: await realpath(this.path, this[_rpcache], this[_stcache]), - loadOverrides: true, - }) - - this.#actualTree.assertRootOverrides() - - // if forceActual is set, don't even try the hidden lockfile - if (!forceActual) { - // Note: hidden lockfile will be rejected if it's not the latest thing - // in the folder, or if any of the entries in the hidden lockfile are - // missing. - const meta = await Shrinkwrap.load({ - path: this.#actualTree.path, - hiddenLockfile: true, - resolveOptions: this.options, - }) - - if (meta.loadedFromDisk) { - this.#actualTree.meta = meta - // have to load on a new Arborist object, so we don't assign - // the virtualTree on this one! Also, the weird reference is because - // we can't easily get a ref to Arborist in this module, without - // creating a circular reference, since this class is a mixin used - // to build up the Arborist class itself. - await new this.constructor({ ...this.options }).loadVirtual({ - root: this.#actualTree, - }) - await this[_setWorkspaces](this.#actualTree) - - this.#transplant(root) - return this.#actualTree - } - } - - const meta = await Shrinkwrap.load({ - path: this.#actualTree.path, - lockfileVersion: this.options.lockfileVersion, - resolveOptions: this.options, - }) - this.#actualTree.meta = meta - } - - await this.#loadFSTree(this.#actualTree) - await this[_setWorkspaces](this.#actualTree) - - // if there are workspace targets without Link nodes created, load - // the targets, so that we know what they are. - if (this.#actualTree.workspaces && this.#actualTree.workspaces.size) { - const promises = [] - for (const path of this.#actualTree.workspaces.values()) { - if (!this.#cache.has(path)) { - // workspace overrides use the root overrides - const p = this.#loadFSNode({ path, root: this.#actualTree, useRootOverrides: true }) - .then(node => this.#loadFSTree(node)) - promises.push(p) - } - } - await Promise.all(promises) - } - - if (!ignoreMissing) { - await this.#findMissingEdges() - } - - // try to find a node that is the parent in a fs tree sense, but not a - // node_modules tree sense, of any link targets. this allows us to - // resolve deps that node will find, but a legacy npm view of the - // world would not have noticed. - for (const path of this.#topNodes) { - const node = this.#cache.get(path) - if (node && !node.parent && !node.fsParent) { - for (const p of walkUp(dirname(path))) { - if (this.#cache.has(p)) { - node.fsParent = this.#cache.get(p) - break - } - } - } - } - - this.#transplant(root) - - if (global) { - // need to depend on the children, or else all of them - // will end up being flagged as extraneous, since the - // global root isn't a "real" project - const tree = this.#actualTree - const actualRoot = tree.isLink ? tree.target : tree - const { dependencies = {} } = actualRoot.package - for (const [name, kid] of actualRoot.children.entries()) { - const def = kid.isLink ? `file:${kid.realpath}` : '*' - dependencies[name] = dependencies[name] || def - } - actualRoot.package = { ...actualRoot.package, dependencies } - } - return this.#actualTree - } - - #transplant (root) { - if (!root || root === this.#actualTree) { - return - } - - this.#actualTree[_changePath](root.path) - for (const node of this.#actualTree.children.values()) { - if (!this.#transplantFilter(node)) { - node.root = null - } - } - - root.replace(this.#actualTree) - for (const node of this.#actualTree.fsChildren) { - node.root = this.#transplantFilter(node) ? root : null - } - - this.#actualTree = root - } - - async #loadFSNode ({ path, parent, real, root, loadOverrides, useRootOverrides }) { - if (!real) { - try { - real = await realpath(path, this[_rpcache], this[_stcache]) - } catch (error) { - // if realpath fails, just provide a dummy error node - return new Node({ - error, - path, - realpath: path, - parent, - root, - loadOverrides, - }) - } - } - - const cached = this.#cache.get(path) - let node - // missing edges get a dummy node, assign the parent and return it - if (cached && !cached.dummy) { - cached.parent = parent - return cached - } else { - const params = { - installLinks: this.installLinks, - legacyPeerDeps: this.legacyPeerDeps, - path, - realpath: real, - parent, - root, - loadOverrides, - } - - try { - const { content: pkg } = await PackageJson.normalize(real) - params.pkg = pkg - if (useRootOverrides && root.overrides) { - params.overrides = root.overrides.getNodeRule({ name: pkg.name, version: pkg.version }) - } - } catch (err) { - if (err.code === 'EJSONPARSE') { - // TODO @npmcli/package-json should be doing this - err.path = join(real, 'package.json') - } - params.error = err - } - - // soldier on if read-package-json raises an error, passing it to the - // Node which will attach it to its errors array (Link passes it along to - // its target node) - if (normalize(path) === real) { - node = this.#newNode(params) - } else { - node = await this.#newLink(params) - } - } - this.#cache.set(path, node) - return node - } - - #newNode (options) { - // check it for an fsParent if it's a tree top. there's a decent chance - // it'll get parented later, making the fsParent scan a no-op, but better - // safe than sorry, since it's cheap. - const { parent, realpath } = options - if (!parent) { - this.#topNodes.add(realpath) - } - return new Node(options) - } - - async #newLink (options) { - const { realpath } = options - this.#topNodes.add(realpath) - const target = this.#cache.get(realpath) - const link = new Link({ ...options, target }) - - if (!target) { - // Link set its target itself in this case - this.#cache.set(realpath, link.target) - // if a link target points at a node outside of the root tree's - // node_modules hierarchy, then load that node as well. - await this.#loadFSTree(link.target) - } - - return link - } - - async #loadFSTree (node) { - const did = this.#actualTreeLoaded - if (!node.isLink && !did.has(node.target.realpath)) { - did.add(node.target.realpath) - await this.#loadFSChildren(node.target) - return Promise.all( - [...node.target.children.entries()] - .filter(([, kid]) => !did.has(kid.realpath)) - .map(([, kid]) => this.#loadFSTree(kid)) - ) - } - } - - // create child nodes for all the entries in node_modules - // and attach them to the node as a parent - async #loadFSChildren (node) { - const nm = resolve(node.realpath, 'node_modules') - try { - const kids = await readdirScoped(nm).then(paths => paths.map(p => p.replace(/\\/g, '/'))) - return Promise.all( - // ignore . dirs and retired scoped package folders - kids.filter(kid => !/^(@[^/]+\/)?\./.test(kid)) - .filter(kid => this.#filter(node, kid)) - .map(kid => this.#loadFSNode({ - parent: node, - path: resolve(nm, kid), - }))) - } catch { - // error in the readdir is not fatal, just means no kids - } - } - - async #findMissingEdges () { - // try to resolve any missing edges by walking up the directory tree, - // checking for the package in each node_modules folder. stop at the - // root directory. - // The tricky move here is that we load a "dummy" node for the folder - // containing the node_modules folder, so that it can be assigned as - // the fsParent. It's a bad idea to *actually* load that full node, - // because people sometimes develop in ~/projects/node_modules/... - // so we'd end up loading a massive tree with lots of unrelated junk. - const nmContents = new Map() - const tree = this.#actualTree - for (const node of tree.inventory.values()) { - const ancestor = ancestorPath(node.realpath, this.path) - - const depPromises = [] - for (const [name, edge] of node.edgesOut.entries()) { - const notMissing = !edge.missing && - !(edge.to && (edge.to.dummy || edge.to.parent !== node)) - if (notMissing) { - continue - } - - // start the walk from the dirname, because we would have found - // the dep in the loadFSTree step already if it was local. - for (const p of walkUp(dirname(node.realpath))) { - // only walk as far as the nearest ancestor - // this keeps us from going into completely unrelated - // places when a project is just missing something, but - // allows for finding the transitive deps of link targets. - // ie, if it has to go up and back out to get to the path - // from the nearest common ancestor, we've gone too far. - if (ancestor && /^\.\.(?:[\\/]|$)/.test(relative(ancestor, p))) { - break - } - - let entries - if (!nmContents.has(p)) { - entries = await readdirScoped(p + '/node_modules') - .catch(() => []).then(paths => paths.map(p => p.replace(/\\/g, '/'))) - nmContents.set(p, entries) - } else { - entries = nmContents.get(p) - } - - if (!entries.includes(name)) { - continue - } - - let d - if (!this.#cache.has(p)) { - d = new Node({ path: p, root: node.root, dummy: true }) - this.#cache.set(p, d) - } else { - d = this.#cache.get(p) - } - if (d.dummy) { - // it's a placeholder, so likely would not have loaded this dep, - // unless another dep in the tree also needs it. - const depPath = normalize(`${p}/node_modules/${name}`) - const cached = this.#cache.get(depPath) - if (!cached || cached.dummy) { - depPromises.push(this.#loadFSNode({ - path: depPath, - root: node.root, - parent: d, - }).then(node => this.#loadFSTree(node))) - } - } - break - } - } - await Promise.all(depPromises) - } - } -} diff --git a/workspaces/arborist/lib/arborist/load-virtual.js b/workspaces/arborist/lib/arborist/load-virtual.js deleted file mode 100644 index ff0583181529b..0000000000000 --- a/workspaces/arborist/lib/arborist/load-virtual.js +++ /dev/null @@ -1,289 +0,0 @@ -const { resolve } = require('node:path') -// mixin providing the loadVirtual method -const mapWorkspaces = require('@npmcli/map-workspaces') -const PackageJson = require('@npmcli/package-json') -const nameFromFolder = require('@npmcli/name-from-folder') - -const consistentResolve = require('../consistent-resolve.js') -const Shrinkwrap = require('../shrinkwrap.js') -const Node = require('../node.js') -const Link = require('../link.js') -const relpath = require('../relpath.js') -const calcDepFlags = require('../calc-dep-flags.js') -const treeCheck = require('../tree-check.js') - -const flagsSuspect = Symbol.for('flagsSuspect') -const setWorkspaces = Symbol.for('setWorkspaces') - -module.exports = cls => class VirtualLoader extends cls { - #rootOptionProvided - - constructor (options) { - super(options) - - // the virtual tree we load from a shrinkwrap - this.virtualTree = options.virtualTree - this[flagsSuspect] = false - } - - // public method - async loadVirtual (options = {}) { - if (this.virtualTree) { - return this.virtualTree - } - - // allow the user to set reify options on the ctor as well. - // XXX: deprecate separate reify() options object. - options = { ...this.options, ...options } - - if (options.root && options.root.meta) { - await this.#loadFromShrinkwrap(options.root.meta, options.root) - return treeCheck(this.virtualTree) - } - - const s = await Shrinkwrap.load({ - path: this.path, - lockfileVersion: this.options.lockfileVersion, - resolveOptions: this.options, - }) - if (!s.loadedFromDisk && !options.root) { - const er = new Error('loadVirtual requires existing shrinkwrap file') - throw Object.assign(er, { code: 'ENOLOCK' }) - } - - // when building the ideal tree, we pass in a root node to this function - // otherwise, load it from the root package json or the lockfile - const pkg = await PackageJson.normalize(this.path).then(p => p.content).catch(() => s.data.packages[''] || {}) - // TODO clean this up - const { - root = await this[setWorkspaces](this.#loadNode('', pkg, true)), - } = options - this.#rootOptionProvided = options.root - - await this.#loadFromShrinkwrap(s, root) - root.assertRootOverrides() - return treeCheck(this.virtualTree) - } - - async #loadFromShrinkwrap (s, root) { - if (!this.#rootOptionProvided) { - // root is never any of these things, but might be a brand new - // baby Node object that never had its dep flags calculated. - root.unsetDepFlags() - } else { - this[flagsSuspect] = true - } - - this.#checkRootEdges(s, root) - root.meta = s - this.virtualTree = root - const { links, nodes } = this.#resolveNodes(s, root) - await this.#resolveLinks(links, nodes) - if (!(s.originalLockfileVersion >= 2)) { - this.#assignBundles(nodes) - } - if (this[flagsSuspect]) { - // reset all dep flags - // can't use inventory here, because virtualTree might not be root - for (const node of nodes.values()) { - if (node.isRoot || node === this.#rootOptionProvided) { - continue - } - node.resetDepFlags() - } - calcDepFlags(this.virtualTree, !this.#rootOptionProvided) - } - return root - } - - // check the lockfile deps, and see if they match. if they do not - // then we have to reset dep flags at the end. for example, if the - // user manually edits their package.json file, then we need to know - // that the idealTree is no longer entirely trustworthy. - #checkRootEdges (s, root) { - // loaded virtually from tree, no chance of being out of sync - // ancient lockfiles are critically damaged by this process, - // so we need to just hope for the best in those cases. - if (!s.loadedFromDisk || s.ancientLockfile) { - return - } - - const lock = s.get('') - const prod = lock.dependencies || {} - const dev = lock.devDependencies || {} - const optional = lock.optionalDependencies || {} - const peer = lock.peerDependencies || {} - const peerOptional = {} - - if (lock.peerDependenciesMeta) { - for (const [name, meta] of Object.entries(lock.peerDependenciesMeta)) { - if (meta.optional && peer[name] !== undefined) { - peerOptional[name] = peer[name] - delete peer[name] - } - } - } - - for (const name of Object.keys(optional)) { - delete prod[name] - } - - const lockWS = {} - const workspaces = mapWorkspaces.virtual({ - cwd: this.path, - lockfile: s.data, - }) - - for (const [name, path] of workspaces.entries()) { - lockWS[name] = `file:${path}` - } - - // Should rootNames exclude optional? - const rootNames = new Set(root.edgesOut.keys()) - - const lockByType = ({ dev, optional, peer, peerOptional, prod, workspace: lockWS }) - - // Find anything in shrinkwrap deps that doesn't match root's type or spec - for (const type in lockByType) { - const deps = lockByType[type] - for (const name in deps) { - const edge = root.edgesOut.get(name) - if (!edge || edge.type !== type || edge.spec !== deps[name]) { - return this[flagsSuspect] = true - } - rootNames.delete(name) - } - } - // Something was in root that's not accounted for in shrinkwrap - if (rootNames.size) { - return this[flagsSuspect] = true - } - } - - // separate out link metadata, and create Node objects for nodes - #resolveNodes (s, root) { - const links = new Map() - const nodes = new Map([['', root]]) - for (const [location, meta] of Object.entries(s.data.packages)) { - // skip the root because we already got it - if (!location) { - continue - } - - if (meta.link) { - links.set(location, meta) - } else { - nodes.set(location, this.#loadNode(location, meta)) - } - } - return { links, nodes } - } - - // links is the set of metadata, and nodes is the map of non-Link nodes - // Set the targets to nodes in the set, if we have them (we might not) - async #resolveLinks (links, nodes) { - for (const [location, meta] of links.entries()) { - const targetPath = resolve(this.path, meta.resolved) - const targetLoc = relpath(this.path, targetPath) - const target = nodes.get(targetLoc) - - if (!target) { - const err = new Error( -`Missing target in lock file: "${targetLoc}" is referenced by "${location}" but does not exist. -To fix: -1. rm package-lock.json -2. npm install` - ) - err.code = 'EMISSINGTARGET' - throw err - } - - const link = this.#loadLink(location, targetLoc, target, meta) - nodes.set(location, link) - nodes.set(targetLoc, link.target) - - // we always need to read the package.json for link targets - // outside node_modules because they can be changed by the local user - if (!link.target.parent) { - await PackageJson.normalize(link.realpath).then(p => link.target.package = p.content).catch(() => null) - } - } - } - - #assignBundles (nodes) { - for (const [location, node] of nodes) { - // Skip assignment of parentage for the root package - if (!location || node.isLink && !node.target.location) { - continue - } - const { name, parent, package: { inBundle } } = node - - if (!parent) { - continue - } - - // read inBundle from package because 'package' here is - // actually a v2 lockfile metadata entry. - // If the *parent* is also bundled, though, or if the parent has - // no dependency on it, then we assume that it's being pulled in - // just by virtue of its parent or a transitive dep being bundled. - const { package: ppkg } = parent - const { inBundle: parentBundled } = ppkg - if (inBundle && !parentBundled && parent.edgesOut.has(node.name)) { - if (!ppkg.bundleDependencies) { - ppkg.bundleDependencies = [name] - } else { - ppkg.bundleDependencies.push(name) - } - } - } - } - - #loadNode (location, sw, loadOverrides) { - const p = this.virtualTree ? this.virtualTree.realpath : this.path - const path = resolve(p, location) - // shrinkwrap doesn't include package name unless necessary - if (!sw.name) { - sw.name = nameFromFolder(path) - } - - const node = new Node({ - installLinks: this.installLinks, - legacyPeerDeps: this.legacyPeerDeps, - root: this.virtualTree, - path, - realpath: path, - integrity: sw.integrity, - resolved: consistentResolve(sw.resolved, this.path, path), - pkg: sw, - hasShrinkwrap: sw.hasShrinkwrap, - loadOverrides, - // cast to boolean because they're undefined in the lock file when false - extraneous: !!sw.extraneous, - devOptional: !!(sw.devOptional || sw.dev || sw.optional), - peer: !!sw.peer, - optional: !!sw.optional, - dev: !!sw.dev, - }) - - return node - } - - #loadLink (location, targetLoc, target) { - const path = resolve(this.path, location) - const link = new Link({ - installLinks: this.installLinks, - legacyPeerDeps: this.legacyPeerDeps, - path, - realpath: resolve(this.path, targetLoc), - target, - pkg: target && target.package, - }) - link.extraneous = target.extraneous - link.devOptional = target.devOptional - link.peer = target.peer - link.optional = target.optional - link.dev = target.dev - return link - } -} diff --git a/workspaces/arborist/lib/arborist/rebuild.js b/workspaces/arborist/lib/arborist/rebuild.js deleted file mode 100644 index 272d6a4122aef..0000000000000 --- a/workspaces/arborist/lib/arborist/rebuild.js +++ /dev/null @@ -1,403 +0,0 @@ -// Arborist.rebuild({path = this.path}) will do all the binlinks and -// bundle building needed. Called by reify, and by `npm rebuild`. - -const PackageJson = require('@npmcli/package-json') -const binLinks = require('bin-links') -const localeCompare = require('@isaacs/string-locale-compare')('en') -const promiseAllRejectLate = require('promise-all-reject-late') -const runScript = require('@npmcli/run-script') -const { callLimit: promiseCallLimit } = require('promise-call-limit') -const { depth: dfwalk } = require('treeverse') -const { isNodeGypPackage, defaultGypInstallScript } = require('@npmcli/node-gyp') -const { log, time } = require('proc-log') -const { resolve } = require('node:path') - -const boolEnv = b => b ? '1' : '' -const sortNodes = (a, b) => (a.depth - b.depth) || localeCompare(a.path, b.path) - -const _checkBins = Symbol.for('checkBins') - -// defined by reify mixin -const _handleOptionalFailure = Symbol.for('handleOptionalFailure') -const _trashList = Symbol.for('trashList') - -module.exports = cls => class Builder extends cls { - #doHandleOptionalFailure - #oldMeta = null - #queues - - constructor (options) { - super(options) - - this.scriptsRun = new Set() - this.#resetQueues() - } - - async rebuild ({ nodes, handleOptionalFailure = false } = {}) { - // nothing to do if we're not building anything! - if (this.options.ignoreScripts && !this.options.binLinks) { - return - } - - // when building for the first time, as part of reify, we ignore - // failures in optional nodes, and just delete them. however, when - // running JUST a rebuild, we treat optional failures as real fails - this.#doHandleOptionalFailure = handleOptionalFailure - - if (!nodes) { - nodes = await this.#loadDefaultNodes() - } - - // separates links nodes so that it can run - // prepare scripts and link bins in the expected order - const timeEnd = time.start('build') - - const { - depNodes, - linkNodes, - } = this.#retrieveNodesByType(nodes) - - // build regular deps - await this.#build(depNodes, {}) - - // build link deps - if (linkNodes.size) { - this.#resetQueues() - await this.#build(linkNodes, { type: 'links' }) - } - - timeEnd() - } - - // if we don't have a set of nodes, then just rebuild - // the actual tree on disk. - async #loadDefaultNodes () { - let nodes - const tree = await this.loadActual() - let filterSet - if (!this.options.workspacesEnabled) { - filterSet = this.excludeWorkspacesDependencySet(tree) - nodes = tree.inventory.filter(node => - filterSet.has(node) || node.isProjectRoot - ) - } else if (this.options.workspaces.length) { - filterSet = this.workspaceDependencySet( - tree, - this.options.workspaces, - this.options.includeWorkspaceRoot - ) - nodes = tree.inventory.filter(node => filterSet.has(node)) - } else { - nodes = tree.inventory.values() - } - return nodes - } - - #retrieveNodesByType (nodes) { - const depNodes = new Set() - const linkNodes = new Set() - const storeNodes = new Set() - - for (const node of nodes) { - if (node.isStoreLink) { - storeNodes.add(node) - } else if (node.isLink) { - linkNodes.add(node) - } else { - depNodes.add(node) - } - } - // Make sure that store linked nodes are processed last. - // We can't process store links separately or else lifecycle scripts on - // standard nodes might not have bin links yet. - for (const node of storeNodes) { - depNodes.add(node) - } - - // deduplicates link nodes and their targets, avoids - // calling lifecycle scripts twice when running `npm rebuild` - // ref: https://github.com/npm/cli/issues/2905 - // - // we avoid doing so if global=true since `bin-links` relies - // on having the target nodes available in global mode. - if (!this.options.global) { - for (const node of linkNodes) { - depNodes.delete(node.target) - } - } - - return { - depNodes, - linkNodes, - } - } - - #resetQueues () { - this.#queues = { - preinstall: [], - install: [], - postinstall: [], - prepare: [], - bin: [], - } - } - - async #build (nodes, { type = 'deps' }) { - const timeEnd = time.start(`build:${type}`) - - await this.#buildQueues(nodes) - - if (!this.options.ignoreScripts) { - await this.#runScripts('preinstall') - } - - // links should run prepare scripts and only link bins after that - if (type === 'links') { - if (!this.options.ignoreScripts) { - await this.#runScripts('prepare') - } - } - if (this.options.binLinks) { - await this.#linkAllBins() - } - - if (!this.options.ignoreScripts) { - await this.#runScripts('install') - await this.#runScripts('postinstall') - } - - timeEnd() - } - - async #buildQueues (nodes) { - const timeEnd = time.start('build:queue') - const set = new Set() - - const promises = [] - for (const node of nodes) { - promises.push(this.#addToBuildSet(node, set)) - - // if it has bundle deps, add those too, if rebuildBundle - if (this.options.rebuildBundle !== false) { - const bd = node.package.bundleDependencies - if (bd && bd.length) { - dfwalk({ - tree: node, - leave: node => promises.push(this.#addToBuildSet(node, set)), - getChildren: node => [...node.children.values()], - filter: node => node.inBundle, - }) - } - } - } - await promiseAllRejectLate(promises) - - // now sort into the queues for the 4 things we have to do - // run in the same predictable order that buildIdealTree uses - // there's no particular reason for doing it in this order rather - // than another, but sorting *somehow* makes it consistent. - const queue = [...set].sort(sortNodes) - - for (const node of queue) { - const { package: { bin, scripts = {} } } = node.target - const { preinstall, install, postinstall, prepare } = scripts - const tests = { bin, preinstall, install, postinstall, prepare } - for (const [key, has] of Object.entries(tests)) { - if (has) { - this.#queues[key].push(node) - } - } - } - timeEnd() - } - - async [_checkBins] (node) { - // if the node is a global top, and we're not in force mode, then - // any existing bins need to either be missing, or a symlink into - // the node path. Otherwise a package can have a preinstall script - // that unlinks something, to allow them to silently overwrite system - // binaries, which is unsafe and insecure. - if (!node.globalTop || this.options.force) { - return - } - const { path, package: pkg } = node - await binLinks.checkBins({ pkg, path, top: true, global: true }) - } - - async #addToBuildSet (node, set, refreshed = false) { - if (set.has(node)) { - return - } - - if (this.#oldMeta === null) { - const { root: { meta } } = node - this.#oldMeta = meta && meta.loadedFromDisk && - !(meta.originalLockfileVersion >= 2) - } - - const { package: pkg, hasInstallScript } = node.target - const { gypfile, bin, scripts = {} } = pkg - - const { preinstall, install, postinstall, prepare } = scripts - const anyScript = preinstall || install || postinstall || prepare - if (!refreshed && !anyScript && (hasInstallScript || this.#oldMeta)) { - // we either have an old metadata (and thus might have scripts) - // or we have an indication that there's install scripts (but - // don't yet know what they are) so we have to load the package.json - // from disk to see what the deal is. Failure here just means - // no scripts to add, probably borked package.json. - // add to the set then remove while we're reading the pj, so we - // don't accidentally hit it multiple times. - set.add(node) - const { content: pkg } = await PackageJson.normalize(node.path).catch(() => { - return { content: {} } - }) - set.delete(node) - - const { scripts = {} } = pkg - node.package.scripts = scripts - return this.#addToBuildSet(node, set, true) - } - - // Rebuild node-gyp dependencies lacking an install or preinstall script - // note that 'scripts' might be missing entirely, and the package may - // set gypfile:false to avoid this automatic detection. - const isGyp = gypfile !== false && - !install && - !preinstall && - await isNodeGypPackage(node.path) - - if (bin || preinstall || install || postinstall || prepare || isGyp) { - if (bin) { - await this[_checkBins](node) - } - if (isGyp) { - scripts.install = defaultGypInstallScript - node.package.scripts = scripts - } - set.add(node) - } - } - - async #runScripts (event) { - const queue = this.#queues[event] - - if (!queue.length) { - return - } - - const timeEnd = time.start(`build:run:${event}`) - const stdio = this.options.foregroundScripts ? 'inherit' : 'pipe' - const limit = this.options.foregroundScripts ? 1 : undefined - await promiseCallLimit(queue.map(node => async () => { - const { - path, - integrity, - resolved, - optional, - peer, - dev, - devOptional, - package: pkg, - location, - isStoreLink, - } = node.target - - // skip any that we know we'll be deleting - // or storeLinks - if (this[_trashList].has(path) || isStoreLink) { - return - } - - const timeEndLocation = time.start(`build:run:${event}:${location}`) - log.info('run', pkg._id, event, location, pkg.scripts[event]) - const env = { - npm_package_resolved: resolved, - npm_package_integrity: integrity, - npm_package_json: resolve(path, 'package.json'), - npm_package_optional: boolEnv(optional), - npm_package_dev: boolEnv(dev), - npm_package_peer: boolEnv(peer), - npm_package_dev_optional: - boolEnv(devOptional && !dev && !optional), - } - const runOpts = { - event, - path, - pkg, - stdio, - env, - scriptShell: this.options.scriptShell, - } - const p = runScript(runOpts).catch(er => { - const { code, signal } = er - log.info('run', pkg._id, event, { code, signal }) - throw er - }).then(({ args, code, signal, stdout, stderr }) => { - this.scriptsRun.add({ - pkg, - path, - event, - // I do not know why this needs to be on THIS line but refactoring - // this function would be quite a process - // eslint-disable-next-line promise/always-return - cmd: args && args[args.length - 1], - env, - code, - signal, - stdout, - stderr, - }) - log.info('run', pkg._id, event, { code, signal }) - }) - - await (this.#doHandleOptionalFailure - ? this[_handleOptionalFailure](node, p) - : p) - - timeEndLocation() - }), { limit }) - timeEnd() - } - - async #linkAllBins () { - const queue = this.#queues.bin - if (!queue.length) { - return - } - - const timeEnd = time.start('build:link') - const promises = [] - // sort the queue by node path, so that the module-local collision - // detector in bin-links will always resolve the same way. - for (const node of queue.sort(sortNodes)) { - // TODO these run before they're awaited - promises.push(this.#createBinLinks(node)) - } - - await promiseAllRejectLate(promises) - timeEnd() - } - - async #createBinLinks (node) { - if (this[_trashList].has(node.path)) { - return - } - - const timeEnd = time.start(`build:link:${node.location}`) - - const p = binLinks({ - pkg: node.package, - path: node.path, - top: !!(node.isTop || node.globalTop), - force: this.options.force, - global: !!node.globalTop, - }) - - await (this.#doHandleOptionalFailure - ? this[_handleOptionalFailure](node, p) - : p) - - timeEnd() - } -} diff --git a/workspaces/arborist/lib/arborist/reify.js b/workspaces/arborist/lib/arborist/reify.js index 70d4d9796d2e7..f93a9ecb7d590 100644 --- a/workspaces/arborist/lib/arborist/reify.js +++ b/workspaces/arborist/lib/arborist/reify.js @@ -1,4 +1,4 @@ -// mixin implementing the reify method +// mixin implementing the reify and rebuild methods const PackageJson = require('@npmcli/package-json') const hgi = require('hosted-git-info') const npa = require('npm-package-arg') @@ -14,6 +14,9 @@ const { lstat, mkdir, rm, symlink } = require('node:fs/promises') const { moveFile } = require('@npmcli/fs') const { subset, intersects } = require('semver') const { walkUp } = require('walk-up-path') +const binLinks = require('bin-links') +const localeCompare = require('@isaacs/string-locale-compare')('en') +const { isNodeGypPackage, defaultGypInstallScript } = require('@npmcli/node-gyp') const AuditReport = require('../audit-report.js') const Diff = require('../diff.js') @@ -34,11 +37,8 @@ const _submitQuickAudit = Symbol('submitQuickAudit') const _unpackNewModules = Symbol.for('unpackNewModules') const _build = Symbol.for('build') -// shared by rebuild mixin -const _trashList = Symbol.for('trashList') const _handleOptionalFailure = Symbol.for('handleOptionalFailure') const _loadTrees = Symbol.for('loadTrees') -// defined by rebuild mixin const _checkBins = Symbol.for('checkBins') // shared symbols for swapping out when testing @@ -57,33 +57,35 @@ const _rollbackRetireShallowNodes = Symbol.for('rollbackRetireShallowNodes') const _rollbackCreateSparseTree = Symbol.for('rollbackCreateSparseTree') const _rollbackMoveBackRetiredUnchanged = Symbol.for('rollbackMoveBackRetiredUnchanged') const _saveIdealTree = Symbol.for('saveIdealTree') -const _reifyPackages = Symbol.for('reifyPackages') // defined by build-ideal-tree mixin const _resolvedAdd = Symbol.for('resolvedAdd') -const _usePackageLock = Symbol.for('usePackageLock') // used by build-ideal-tree mixin const _addNodeToTrashList = Symbol.for('addNodeToTrashList') const _createIsolatedTree = Symbol.for('createIsolatedTree') +const boolEnv = b => b ? '1' : '' +const sortNodes = (a, b) => (a.depth - b.depth) || localeCompare(a.path, b.path) + module.exports = cls => class Reifier extends cls { #bundleMissing = new Set() // child nodes we'd EXPECT to be included in a bundle, but aren't #bundleUnpacked = new Set() // the nodes we unpack to read their bundles - #dryRun + #doHandleOptionalFailure #nmValidated = new Set() + #oldMeta = null #omit + #queues #retiredPaths = {} #retiredUnchanged = {} - #savePrefix #shrinkwrapInflated = new Set() #sparseTreeDirs = new Set() #sparseTreeRoots = new Set() + #trashList = new Set() constructor (options) { super(options) - - this[_trashList] = new Set() + this.#resetQueues() } // public method @@ -122,7 +124,7 @@ module.exports = cls => class Reifier extends cls { this.idealTree = await this[_createIsolatedTree]() } await this[_diffTrees]() - await this[_reifyPackages]() + await this.#reifyPackages() if (linked) { // swap back in the idealTree // so that the lockfile is preserved @@ -136,7 +138,7 @@ module.exports = cls => class Reifier extends cls { } } // clean up any trash that is still in the tree - for (const path of this[_trashList]) { + for (const path of this.#trashList) { const loc = relpath(this.idealTree.realpath, path) const node = this.idealTree.inventory.get(loc) if (node && node.root === this.idealTree) { @@ -261,7 +263,7 @@ module.exports = cls => class Reifier extends cls { return treeCheck(this.actualTree) } - async [_reifyPackages] () { + async #reifyPackages () { // we don't submit the audit report or write to disk on dry runs if (this.options.dryRun) { return @@ -480,9 +482,9 @@ module.exports = cls => class Reifier extends cls { if (retire) { const retired = retirePath(path) moves[path] = retired - this[_trashList].add(retired) + this.#trashList.add(retired) } else { - this[_trashList].add(path) + this.#trashList.add(path) } } } @@ -568,7 +570,7 @@ module.exports = cls => class Reifier extends cls { if (st && !st.isDirectory()) { const retired = retirePath(d) this.#retiredPaths[d] = retired - this[_trashList].add(retired) + this.#trashList.add(retired) await this[_renamePath](d, retired) } } @@ -610,7 +612,7 @@ module.exports = cls => class Reifier extends cls { const shrinkwraps = this.diff.leaves .filter(d => (d.action === 'CHANGE' || d.action === 'ADD' || !d.action) && d.ideal.hasShrinkwrap && !seen.has(d.ideal) && - !this[_trashList].has(d.ideal.path)) + !this.#trashList.has(d.ideal.path)) if (!shrinkwraps.length) { return @@ -754,8 +756,7 @@ module.exports = cls => class Reifier extends cls { return symlink(rel, node.path, 'junction') } - // if the node is optional, then the failure of the promise is nonfatal - // just add it and its optional set to the trash list. + // if the node is optional, then the failure of the promise is nonfatal just add it and its optional set to the trash list. [_handleOptionalFailure] (node, p) { return (node.optional ? p.catch(() => { const set = optionalSet(node) @@ -889,7 +890,7 @@ module.exports = cls => class Reifier extends cls { const set = (bundlesByDepth.get(depth) || []) .filter(node => node.root === this.idealTree && node.target !== node.root && - !this[_trashList].has(node.path)) + !this.#trashList.has(node.path)) if (!set.length) { return this[_loadBundlesAndUpdateTrees](depth + 1, bundlesByDepth) @@ -1231,7 +1232,7 @@ module.exports = cls => class Reifier extends cls { const failures = [] const _rm = path => rm(path, { recursive: true, force: true }).catch(er => failures.push([path, er])) - for (const path of this[_trashList]) { + for (const path of this.#trashList) { promises.push(_rm(path)) } @@ -1496,7 +1497,7 @@ module.exports = cls => class Reifier extends cls { // before now edge specs could be changing, affecting the `requires` field // in the package lock, so we hold off saving to the very last action - if (this[_usePackageLock]) { + if (this.options.usePackageLock) { // preserve indentation, if possible let format = this.idealTree.package[Symbol.for('indent')] if (format === undefined) { @@ -1513,4 +1514,373 @@ module.exports = cls => class Reifier extends cls { timeEnd() return true } + + // Arborist.rebuild({path = this.path}) will do all the binlinks and bundle building needed. Called by reify, and by `npm rebuild`. + async rebuild ({ nodes, handleOptionalFailure = false } = {}) { + // nothing to do if we're not building anything! + if (this.options.ignoreScripts && !this.options.binLinks) { + return + } + + // when building for the first time, as part of reify, we ignore + // failures in optional nodes, and just delete them. however, when + // running JUST a rebuild, we treat optional failures as real fails + this.#doHandleOptionalFailure = handleOptionalFailure + + if (!nodes) { + nodes = await this.#loadDefaultNodes() + } + + // separates links nodes so that it can run + // prepare scripts and link bins in the expected order + const timeEnd = time.start('build') + + const { + depNodes, + linkNodes, + } = this.#retrieveNodesByType(nodes) + + // build regular deps + await this.#build(depNodes, {}) + + // build link deps + if (linkNodes.size) { + this.#resetQueues() + await this.#build(linkNodes, { type: 'links' }) + } + + timeEnd() + } + + // if we don't have a set of nodes, then just rebuild + // the actual tree on disk. + async #loadDefaultNodes () { + let nodes + const tree = await this.loadActual() + let filterSet + if (!this.options.workspacesEnabled) { + filterSet = this.excludeWorkspacesDependencySet(tree) + nodes = tree.inventory.filter(node => + filterSet.has(node) || node.isProjectRoot + ) + } else if (this.options.workspaces.length) { + filterSet = this.workspaceDependencySet( + tree, + this.options.workspaces, + this.options.includeWorkspaceRoot + ) + nodes = tree.inventory.filter(node => filterSet.has(node)) + } else { + nodes = tree.inventory.values() + } + return nodes + } + + #retrieveNodesByType (nodes) { + const depNodes = new Set() + const linkNodes = new Set() + const storeNodes = new Set() + + for (const node of nodes) { + if (node.isStoreLink) { + storeNodes.add(node) + } else if (node.isLink) { + linkNodes.add(node) + } else { + depNodes.add(node) + } + } + // Make sure that store linked nodes are processed last. + // We can't process store links separately or else lifecycle scripts on + // standard nodes might not have bin links yet. + for (const node of storeNodes) { + depNodes.add(node) + } + + // deduplicates link nodes and their targets, avoids + // calling lifecycle scripts twice when running `npm rebuild` + // ref: https://github.com/npm/cli/issues/2905 + // + // we avoid doing so if global=true since `bin-links` relies + // on having the target nodes available in global mode. + if (!this.options.global) { + for (const node of linkNodes) { + depNodes.delete(node.target) + } + } + + return { + depNodes, + linkNodes, + } + } + + #resetQueues () { + this.#queues = { + preinstall: [], + install: [], + postinstall: [], + prepare: [], + bin: [], + } + } + + async #build (nodes, { type = 'deps' }) { + const timeEnd = time.start(`build:${type}`) + + await this.#buildQueues(nodes) + + if (!this.options.ignoreScripts) { + await this.#runScripts('preinstall') + } + + // links should run prepare scripts and only link bins after that + if (type === 'links') { + if (!this.options.ignoreScripts) { + await this.#runScripts('prepare') + } + } + if (this.options.binLinks) { + await this.#linkAllBins() + } + + if (!this.options.ignoreScripts) { + await this.#runScripts('install') + await this.#runScripts('postinstall') + } + + timeEnd() + } + + async #buildQueues (nodes) { + const timeEnd = time.start('build:queue') + const set = new Set() + + const promises = [] + for (const node of nodes) { + promises.push(this.#addToBuildSet(node, set)) + + // if it has bundle deps, add those too, if rebuildBundle + if (this.options.rebuildBundle !== false) { + const bd = node.package.bundleDependencies + if (bd && bd.length) { + dfwalk({ + tree: node, + leave: node => promises.push(this.#addToBuildSet(node, set)), + getChildren: node => [...node.children.values()], + filter: node => node.inBundle, + }) + } + } + } + await promiseAllRejectLate(promises) + + // now sort into the queues for the 4 things we have to do + // run in the same predictable order that buildIdealTree uses + // there's no particular reason for doing it in this order rather + // than another, but sorting *somehow* makes it consistent. + const queue = [...set].sort(sortNodes) + + for (const node of queue) { + const { package: { bin, scripts = {} } } = node.target + const { preinstall, install, postinstall, prepare } = scripts + const tests = { bin, preinstall, install, postinstall, prepare } + for (const [key, has] of Object.entries(tests)) { + if (has) { + this.#queues[key].push(node) + } + } + } + timeEnd() + } + + async [_checkBins] (node) { + // if the node is a global top, and we're not in force mode, then + // any existing bins need to either be missing, or a symlink into + // the node path. Otherwise a package can have a preinstall script + // that unlinks something, to allow them to silently overwrite system + // binaries, which is unsafe and insecure. + if (!node.globalTop || this.options.force) { + return + } + const { path, package: pkg } = node + await binLinks.checkBins({ pkg, path, top: true, global: true }) + } + + async #addToBuildSet (node, set, refreshed = false) { + if (set.has(node)) { + return + } + + if (this.#oldMeta === null) { + const { root: { meta } } = node + this.#oldMeta = meta && meta.loadedFromDisk && + !(meta.originalLockfileVersion >= 2) + } + + const { package: pkg, hasInstallScript } = node.target + const { gypfile, bin, scripts = {} } = pkg + + const { preinstall, install, postinstall, prepare } = scripts + const anyScript = preinstall || install || postinstall || prepare + if (!refreshed && !anyScript && (hasInstallScript || this.#oldMeta)) { + // we either have an old metadata (and thus might have scripts) + // or we have an indication that there's install scripts (but + // don't yet know what they are) so we have to load the package.json + // from disk to see what the deal is. Failure here just means + // no scripts to add, probably borked package.json. + // add to the set then remove while we're reading the pj, so we + // don't accidentally hit it multiple times. + set.add(node) + const { content: pkg } = await PackageJson.normalize(node.path).catch(() => { + return { content: {} } + }) + set.delete(node) + + const { scripts = {} } = pkg + node.package.scripts = scripts + return this.#addToBuildSet(node, set, true) + } + + // Rebuild node-gyp dependencies lacking an install or preinstall script + // note that 'scripts' might be missing entirely, and the package may + // set gypfile:false to avoid this automatic detection. + const isGyp = gypfile !== false && + !install && + !preinstall && + await isNodeGypPackage(node.path) + + if (bin || preinstall || install || postinstall || prepare || isGyp) { + if (bin) { + await this[_checkBins](node) + } + if (isGyp) { + scripts.install = defaultGypInstallScript + node.package.scripts = scripts + } + set.add(node) + } + } + + async #runScripts (event) { + const queue = this.#queues[event] + + if (!queue.length) { + return + } + + const timeEnd = time.start(`build:run:${event}`) + const stdio = this.options.foregroundScripts ? 'inherit' : 'pipe' + const limit = this.options.foregroundScripts ? 1 : undefined + await promiseCallLimit(queue.map(node => async () => { + const { + path, + integrity, + resolved, + optional, + peer, + dev, + devOptional, + package: pkg, + location, + isStoreLink, + } = node.target + + // skip any that we know we'll be deleting + // or storeLinks + if (this.#trashList.has(path) || isStoreLink) { + return + } + + const timeEndLocation = time.start(`build:run:${event}:${location}`) + log.info('run', pkg._id, event, location, pkg.scripts[event]) + const env = { + npm_package_resolved: resolved, + npm_package_integrity: integrity, + npm_package_json: resolve(path, 'package.json'), + npm_package_optional: boolEnv(optional), + npm_package_dev: boolEnv(dev), + npm_package_peer: boolEnv(peer), + npm_package_dev_optional: + boolEnv(devOptional && !dev && !optional), + } + const runOpts = { + event, + path, + pkg, + stdio, + env, + scriptShell: this.options.scriptShell, + } + const p = runScript(runOpts).catch(er => { + const { code, signal } = er + log.info('run', pkg._id, event, { code, signal }) + throw er + }).then(({ args, code, signal, stdout, stderr }) => { + this.scriptsRun.add({ + pkg, + path, + event, + // I do not know why this needs to be on THIS line but refactoring + // this function would be quite a process + // eslint-disable-next-line promise/always-return + cmd: args && args[args.length - 1], + env, + code, + signal, + stdout, + stderr, + }) + log.info('run', pkg._id, event, { code, signal }) + }) + + await (this.#doHandleOptionalFailure + ? this[_handleOptionalFailure](node, p) + : p) + + timeEndLocation() + }), { limit }) + timeEnd() + } + + async #linkAllBins () { + const queue = this.#queues.bin + if (!queue.length) { + return + } + + const timeEnd = time.start('build:link') + const promises = [] + // sort the queue by node path, so that the module-local collision + // detector in bin-links will always resolve the same way. + for (const node of queue.sort(sortNodes)) { + // TODO these run before they're awaited + promises.push(this.#createBinLinks(node)) + } + + await promiseAllRejectLate(promises) + timeEnd() + } + + async #createBinLinks (node) { + if (this.#trashList.has(node.path)) { + return + } + + const timeEnd = time.start(`build:link:${node.location}`) + + const p = binLinks({ + pkg: node.package, + path: node.path, + top: !!(node.isTop || node.globalTop), + force: this.options.force, + global: !!node.globalTop, + }) + + await (this.#doHandleOptionalFailure + ? this[_handleOptionalFailure](node, p) + : p) + + timeEnd() + } } diff --git a/workspaces/arborist/lib/optional-set.js b/workspaces/arborist/lib/optional-set.js index 76d557c0e52c5..021a0ef72aa17 100644 --- a/workspaces/arborist/lib/optional-set.js +++ b/workspaces/arborist/lib/optional-set.js @@ -10,10 +10,6 @@ const gatherDepSet = require('./gather-dep-set.js') const optionalSet = node => { - if (!node.optional) { - return new Set() - } - // start with the node, then walk up the dependency graph until we // get to the boundaries that define the optional set. since the // node is optional, we know that all paths INTO this area of the diff --git a/workspaces/arborist/test/optional-set.js b/workspaces/arborist/test/optional-set.js index f28564554a71b..ff11ea3b4818c 100644 --- a/workspaces/arborist/test/optional-set.js +++ b/workspaces/arborist/test/optional-set.js @@ -75,9 +75,6 @@ t.equal(setJ.has(nodeJ), true, 'gathering from j includes j') t.equal(setJ.has(nodeI), true, 'gathering from j includes i') t.equal(setJ.size, 2, 'two nodes in j set') -const setA = optionalSet(nodeA) -t.equal(setA.size, 0, 'gathering from a is empty set') - const setO = optionalSet(nodeO) t.equal(setO.size, 3, 'three nodes in o set') t.equal(setO.has(nodeO), true, 'set o includes o')