From 3f2e2da3866c4e5f26ca0c0d6721cb371c5a3933 Mon Sep 17 00:00:00 2001 From: isaacs Date: Wed, 4 Aug 2021 15:48:21 -0700 Subject: [PATCH] fix: normalize paths on Windows systems This change uses / as the One True Path Separator, as the gods of POSIX intended in their divine wisdom. On windows, \ characters are converted to /, everywhere and in depth. However, on posix systems, \ is a valid filename character, and is not treated specially. So, instead of splitting on `/[/\\]/`, we can now just split on `'/'` to get a set of path parts. This does mean that archives with entries containing \ will extract differently on Windows systems than on correct systems. However, this is also the behavior of both bsdtar and gnutar, so it seems appropriate to follow suit. Additionally, dirCache pruning is now done case-insensitively. On case-sensitive systems, this potentially results in a few extra lstat calls. However, on case-insensitive systems, it prevents incorrect cache hits. --- lib/mkdir.js | 41 ++++++++++++++----------- lib/normalize-windows-path.js | 8 +++++ lib/pack.js | 7 +++-- lib/path-reservations.js | 6 ++-- lib/unpack.js | 56 +++++++++++++++++----------------- lib/write-entry.js | 10 +++--- package-lock.json | 15 +++++++++ package.json | 1 + test/normalize-windows-path.js | 27 ++++++++++++++++ 9 files changed, 116 insertions(+), 55 deletions(-) create mode 100644 lib/normalize-windows-path.js create mode 100644 test/normalize-windows-path.js diff --git a/lib/mkdir.js b/lib/mkdir.js index c6a154c2..29aa3be1 100644 --- a/lib/mkdir.js +++ b/lib/mkdir.js @@ -8,6 +8,7 @@ const mkdirp = require('mkdirp') const fs = require('fs') const path = require('path') const chownr = require('chownr') +const normPath = require('./normalize-windows-path.js') class SymlinkError extends Error { constructor (symlink, path) { @@ -33,7 +34,11 @@ class CwdError extends Error { } } -const mkdir = module.exports = (dir, opt, cb) => { +const cGet = (cache, key) => cache.get(normPath(key)) +const cSet = (cache, key, val) => cache.set(normPath(key), val) + +module.exports = (dir, opt, cb) => { + dir = normPath(dir) // if there's any overlap between mask and mode, // then we'll need an explicit chmod const umask = opt.umask @@ -49,13 +54,13 @@ const mkdir = module.exports = (dir, opt, cb) => { const preserve = opt.preserve const unlink = opt.unlink const cache = opt.cache - const cwd = opt.cwd + const cwd = normPath(opt.cwd) const done = (er, created) => { if (er) cb(er) else { - cache.set(dir, true) + cSet(cache, dir, true) if (created && doChown) chownr(created, uid, gid, er => done(er)) else if (needChmod) @@ -65,7 +70,7 @@ const mkdir = module.exports = (dir, opt, cb) => { } } - if (cache && cache.get(dir) === true) + if (cache && cGet(cache, dir) === true) return done() if (dir === cwd) @@ -79,7 +84,7 @@ const mkdir = module.exports = (dir, opt, cb) => { return mkdirp(dir, mode, done) const sub = path.relative(cwd, dir) - const parts = sub.split(/\/|\\/) + const parts = sub.split('/') mkdir_(cwd, parts, mode, cache, unlink, cwd, null, done) } @@ -88,7 +93,7 @@ const mkdir_ = (base, parts, mode, cache, unlink, cwd, created, cb) => { return cb(null, created) const p = parts.shift() const part = base + '/' + p - if (cache.get(part)) + if (cGet(cache, part)) return mkdir_(part, parts, mode, cache, unlink, cwd, created, cb) fs.mkdir(part, mode, onmkdir(part, parts, mode, cache, unlink, cwd, created, cb)) } @@ -121,7 +126,8 @@ const onmkdir = (part, parts, mode, cache, unlink, cwd, created, cb) => er => { } } -const mkdirSync = module.exports.sync = (dir, opt) => { +module.exports.sync = (dir, opt) => { + dir = normPath(dir) // if there's any overlap between mask and mode, // then we'll need an explicit chmod const umask = opt.umask @@ -137,17 +143,17 @@ const mkdirSync = module.exports.sync = (dir, opt) => { const preserve = opt.preserve const unlink = opt.unlink const cache = opt.cache - const cwd = opt.cwd + const cwd = normPath(opt.cwd) const done = (created) => { - cache.set(dir, true) + cSet(cache, dir, true) if (created && doChown) chownr.sync(created, uid, gid) if (needChmod) fs.chmodSync(dir, mode) } - if (cache && cache.get(dir) === true) + if (cache && cGet(cache, dir) === true) return done() if (dir === cwd) { @@ -169,19 +175,18 @@ const mkdirSync = module.exports.sync = (dir, opt) => { return done(mkdirp.sync(dir, mode)) const sub = path.relative(cwd, dir) - const parts = sub.split(/\/|\\/) + const parts = sub.split('/') let created = null for (let p = parts.shift(), part = cwd; - p && (part += '/' + p); - p = parts.shift()) { - - if (cache.get(part)) + p && (part += '/' + p); + p = parts.shift()) { + if (cGet(cache, part)) continue try { fs.mkdirSync(part, mode) created = created || part - cache.set(part, true) + cSet(cache, part, true) } catch (er) { if (er.path && path.dirname(er.path) === cwd && (er.code === 'ENOTDIR' || er.code === 'ENOENT')) @@ -189,13 +194,13 @@ const mkdirSync = module.exports.sync = (dir, opt) => { const st = fs.lstatSync(part) if (st.isDirectory()) { - cache.set(part, true) + cSet(cache, part, true) continue } else if (unlink) { fs.unlinkSync(part) fs.mkdirSync(part, mode) created = created || part - cache.set(part, true) + cSet(cache, part, true) continue } else if (st.isSymbolicLink()) return new SymlinkError(part, part + '/' + parts.join('/')) diff --git a/lib/normalize-windows-path.js b/lib/normalize-windows-path.js new file mode 100644 index 00000000..8e3c30a0 --- /dev/null +++ b/lib/normalize-windows-path.js @@ -0,0 +1,8 @@ +// on windows, either \ or / are valid directory separators. +// on unix, \ is a valid character in filenames. +// so, on windows, and only on windows, we replace all \ chars with /, +// so that we can use / as our one and only directory separator char. + +const platform = process.env.TESTING_TAR_FAKE_PLATFORM || process.platform +module.exports = platform !== 'win32' ? p => p + : p => p.replace(/\\/g, '/') diff --git a/lib/pack.js b/lib/pack.js index 80624bbc..102a3d51 100644 --- a/lib/pack.js +++ b/lib/pack.js @@ -56,6 +56,7 @@ const ONDRAIN = Symbol('ondrain') const fs = require('fs') const path = require('path') const warner = require('./warn-mixin.js') +const normPath = require('./normalize-windows-path.js') const Pack = warner(class Pack extends MiniPass { constructor (opt) { @@ -67,7 +68,7 @@ const Pack = warner(class Pack extends MiniPass { this.preservePaths = !!opt.preservePaths this.strict = !!opt.strict this.noPax = !!opt.noPax - this.prefix = (opt.prefix || '').replace(/(\\|\/)+$/, '') + this.prefix = normPath(opt.prefix || '') this.linkCache = opt.linkCache || new Map() this.statCache = opt.statCache || new Map() this.readdirCache = opt.readdirCache || new Map() @@ -132,7 +133,7 @@ const Pack = warner(class Pack extends MiniPass { } [ADDTARENTRY] (p) { - const absolute = path.resolve(this.cwd, p.path) + const absolute = normPath(path.resolve(this.cwd, p.path)) // in this case, we don't have to wait for the stat if (!this.filter(p.path, p)) p.resume() @@ -148,7 +149,7 @@ const Pack = warner(class Pack extends MiniPass { } [ADDFSENTRY] (p) { - const absolute = path.resolve(this.cwd, p) + const absolute = normPath(path.resolve(this.cwd, p)) this[QUEUE].push(new PackJob(p, absolute)) this[PROCESS]() } diff --git a/lib/path-reservations.js b/lib/path-reservations.js index 3cf0c2c1..7ea03829 100644 --- a/lib/path-reservations.js +++ b/lib/path-reservations.js @@ -7,6 +7,7 @@ // while still allowing maximal safe parallelization. const assert = require('assert') +const normPath = require('./normalize-windows-path.js') module.exports = () => { // path => [function or Set] @@ -20,8 +21,9 @@ module.exports = () => { // return a set of parent dirs for a given path const { join } = require('path') const getDirs = path => - join(path).split(/[\\\/]/).slice(0, -1).reduce((set, path) => - set.length ? set.concat(join(set[set.length-1], path)) : [path], []) + normPath(join(path)).split('/').slice(0, -1).reduce((set, path) => + set.length ? set.concat(normPath(join(set[set.length-1], path))) + : [path], []) // functions currently running const running = new Set() diff --git a/lib/unpack.js b/lib/unpack.js index bae677d2..6ba11a21 100644 --- a/lib/unpack.js +++ b/lib/unpack.js @@ -94,6 +94,17 @@ const uint32 = (a, b, c) => : b === b >>> 0 ? b : c +const pruneCache = (cache, abs) => { + // clear the cache if it's a case-insensitive match, since we can't + // know if the current file system is case-sensitive or not. + abs = normPath(abs).toLowerCase() + for (const path of cache.keys()) { + const plower = path.toLowerCase() + if (plower === abs || plower.toLowerCase().indexOf(abs + '/') === 0) + cache.delete(path) + } +} + class Unpack extends Parser { constructor (opt) { if (!opt) @@ -170,7 +181,7 @@ class Unpack extends Parser { // links, and removes symlink directories rather than erroring this.unlink = !!opt.unlink - this.cwd = path.resolve(opt.cwd || process.cwd()) + this.cwd = normPath(path.resolve(opt.cwd || process.cwd())) this.strip = +opt.strip || 0 this.processUmask = process.umask() this.umask = typeof opt.umask === 'number' ? opt.umask : this.processUmask @@ -191,7 +202,7 @@ class Unpack extends Parser { [CHECKPATH] (entry) { if (this.strip) { - const parts = entry.path.split(/\/|\\/) + const parts = normPath(entry.path).split('/') if (parts.length < this.strip) return false entry.path = parts.slice(this.strip).join('/') @@ -199,16 +210,16 @@ class Unpack extends Parser { return false if (entry.type === 'Link') { - const linkparts = entry.linkpath.split(/\/|\\/) + const linkparts = normPath(entry.linkpath).split('/') if (linkparts.length >= this.strip) entry.linkpath = linkparts.slice(this.strip).join('/') } } if (!this.preservePaths) { - const p = entry.path - if (p.match(/(^|\/|\\)\.\.(\\|\/|$)/)) { - this.warn('path contains \'..\'', p) + const p = normPath(entry.path) + if (p.split('/').includes('..')) { + this.warn(`path contains '..'`, p) return false } @@ -229,9 +240,9 @@ class Unpack extends Parser { } if (path.isAbsolute(entry.path)) - entry.absolute = entry.path + entry.absolute = normPath(entry.path) else - entry.absolute = path.resolve(this.cwd, entry.path) + entry.absolute = normPath(path.resolve(this.cwd, entry.path)) return true } @@ -276,7 +287,7 @@ class Unpack extends Parser { } [MKDIR] (dir, mode, cb) { - mkdir(dir, { + mkdir(normPath(dir), { uid: this.uid, gid: this.gid, processUid: this.processUid, @@ -408,7 +419,8 @@ class Unpack extends Parser { } [HARDLINK] (entry, done) { - this[LINK](entry, path.resolve(this.cwd, entry.linkpath), 'link', done) + const linkpath = normPath(path.resolve(this.cwd, entry.linkpath)) + this[LINK](entry, linkpath, 'link', done) } [PEND] () { @@ -444,14 +456,8 @@ class Unpack extends Parser { // then that means we are about to delete the directory we created // previously, and it is no longer going to be a directory, and neither // is any of its children. - if (entry.type !== 'Directory') { - for (const path of this.dirCache.keys()) { - if (path === entry.absolute || - path.indexOf(entry.absolute + '/') === 0 || - path.indexOf(entry.absolute + '\\') === 0) - this.dirCache.delete(path) - } - } + if (entry.type !== 'Directory') + pruneCache(this.dirCache, entry.absolute) const paths = [entry.path] if (entry.linkpath) @@ -508,7 +514,7 @@ class Unpack extends Parser { } [LINK] (entry, linkpath, link, done) { - // XXX: get the type ('file' or 'dir') for windows + // XXX: get the type ('symlink' or 'junction') for windows fs[link](linkpath, entry.absolute, er => { if (er) return this[ONERROR](er, entry) @@ -525,14 +531,8 @@ class UnpackSync extends Unpack { } [CHECKFS] (entry) { - if (entry.type !== 'Directory') { - for (const path of this.dirCache.keys()) { - if (path === entry.absolute || - path.indexOf(entry.absolute + '/') === 0 || - path.indexOf(entry.absolute + '\\') === 0) - this.dirCache.delete(path) - } - } + if (entry.type !== 'Directory') + pruneCache(this.dirCache, entry.absolute) const er = this[MKDIR](path.dirname(entry.absolute), this.dmode, neverCalled) if (er) @@ -650,7 +650,7 @@ class UnpackSync extends Unpack { [MKDIR] (dir, mode) { try { - return mkdir.sync(dir, { + return mkdir.sync(normPath(dir), { uid: this.uid, gid: this.gid, processUid: this.processUid, diff --git a/lib/write-entry.js b/lib/write-entry.js index 490aaca0..b1df02a7 100644 --- a/lib/write-entry.js +++ b/lib/write-entry.js @@ -6,12 +6,14 @@ const Header = require('./header.js') const ReadEntry = require('./read-entry.js') const fs = require('fs') const path = require('path') +const normPath = require('./normalize-windows-path.js') +const stripSlash = require('./strip-trailing-slashes.js') const prefixPath = (path, prefix) => { if (!prefix) return path - path = path.replace(/^\.([/\\]|$)/, '') - return prefix + '/' + path + path = normPath(path).replace(/^\.(\/|$)/, '') + return stripSlash(prefix) + '/' + path } const maxReadSize = 16 * 1024 * 1024 @@ -45,7 +47,7 @@ const WriteEntry = warner(class WriteEntry extends MiniPass { super(opt) if (typeof p !== 'string') throw new TypeError('path is required') - this.path = p + this.path = normPath(p) // suppress atime, ctime, uid, gid, uname, gname this.portable = !!opt.portable // until node has builtin pwnam functions, this'll have to do @@ -88,7 +90,7 @@ const WriteEntry = warner(class WriteEntry extends MiniPass { p = p.replace(/\\/g, '/') } - this.absolute = opt.absolute || path.resolve(this.cwd, p) + this.absolute = normPath(opt.absolute || path.resolve(this.cwd, p)) if (this.path === '') this.path = './' diff --git a/package-lock.json b/package-lock.json index 921bfd70..41a63a61 100644 --- a/package-lock.json +++ b/package-lock.json @@ -355,6 +355,12 @@ } } }, + "caller": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/caller/-/caller-1.0.1.tgz", + "integrity": "sha1-uFGGD3Dhlds9J3OVqhp+I+ow7PU=", + "dev": true + }, "camelcase": { "version": "5.3.1", "resolved": "https://registry.npmjs.org/camelcase/-/camelcase-5.3.1.tgz", @@ -1781,6 +1787,15 @@ "integrity": "sha1-jGStX9MNqxyXbiNE/+f3kqam30I=", "dev": true }, + "require-inject": { + "version": "1.4.4", + "resolved": "https://registry.npmjs.org/require-inject/-/require-inject-1.4.4.tgz", + "integrity": "sha512-5Y5ctRN84+I4iOZO61gm+48tgP/6Hcd3VZydkaEM3MCuOvnHRsTJYQBOc01faI/Z9at5nsCAJVHhlfPA6Pc0Og==", + "dev": true, + "requires": { + "caller": "^1.0.1" + } + }, "require-main-filename": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/require-main-filename/-/require-main-filename-2.0.0.tgz", diff --git a/package.json b/package.json index 7a677d70..d7cb9054 100644 --- a/package.json +++ b/package.json @@ -32,6 +32,7 @@ "end-of-stream": "^1.4.4", "events-to-array": "^1.1.2", "mutate-fs": "^2.1.1", + "require-inject": "^1.4.4", "rimraf": "^2.7.1", "tap": "^14.11.0", "tar-fs": "^1.16.3", diff --git a/test/normalize-windows-path.js b/test/normalize-windows-path.js new file mode 100644 index 00000000..e631c8ac --- /dev/null +++ b/test/normalize-windows-path.js @@ -0,0 +1,27 @@ +const t = require('tap') + +const realPlatform = process.platform +const fakePlatform = realPlatform === 'win32' ? 'posix' : 'win32' +const requireInject = require('require-inject') + +t.test('posix', t => { + if (realPlatform === 'win32') + process.env.TESTING_TAR_FAKE_PLATFORM = fakePlatform + else + delete process.env.TESTING_TAR_FAKE_PLATFORM + const normPath = requireInject('../lib/normalize-windows-path.js') + t.equal(normPath('/some/path/back\\slashes'), '/some/path/back\\slashes') + t.equal(normPath('c:\\foo\\bar'), 'c:\\foo\\bar') + t.end() +}) + +t.test('win32', t => { + if (realPlatform !== 'win32') + process.env.TESTING_TAR_FAKE_PLATFORM = fakePlatform + else + delete process.env.TESTING_TAR_FAKE_PLATFORM + const normPath = requireInject('../lib/normalize-windows-path.js') + t.equal(normPath('/some/path/back\\slashes'), '/some/path/back/slashes') + t.equal(normPath('c:\\foo\\bar'), 'c:/foo/bar') + t.end() +})