diff --git a/src/utils/normalize-path.ts b/src/utils/normalize-path.ts index a02b4bdc9f2..1accc62d903 100644 --- a/src/utils/normalize-path.ts +++ b/src/utils/normalize-path.ts @@ -5,43 +5,199 @@ * extended-length paths and don't contain any non-ascii characters. * This was created since the path methods in Node.js outputs \\ paths on Windows. */ -export const normalizePath = (str: string): string => { - // https://github.com/sindresorhus/slash MIT - // By Sindre Sorhus - if (typeof str !== 'string') { +export const normalizePath = (path: string) => { + if (typeof path !== 'string') { throw new Error(`invalid path to normalize`); } - str = str.trim(); + path = normalizeSlashes(path); + let normalized = getPathFromPathComponents(reducePathComponents(getPathComponents(path))); + if (normalized) { + normalized = normalized.trim(); - if (EXTENDED_PATH_REGEX.test(str) || NON_ASCII_REGEX.test(str)) { - return str; + // always remove the trailing / + // this makes our file cache look ups consistent + if (normalized.charAt(normalized.length - 1) === '/') { + const colonIndex = normalized.indexOf(':'); + if (colonIndex > -1) { + if (colonIndex < normalized.length - 2) { + normalized = normalized.substring(0, normalized.length - 1); + } + } else if (normalized.length > 1) { + normalized = normalized.substring(0, normalized.length - 1); + } + } } + return normalized; +}; - str = str.replace(SLASH_REGEX, '/'); +const normalizeSlashes = (path: string) => path.replace(backslashRegExp, '/'); - // always remove the trailing / - // this makes our file cache look ups consistent - if (str.charAt(str.length - 1) === '/') { - const colonIndex = str.indexOf(':'); - if (colonIndex > -1) { - if (colonIndex < str.length - 2) { - str = str.substring(0, str.length - 1); +const altDirectorySeparator = "\\"; +const urlSchemeSeparator = "://"; +const backslashRegExp = /\\/g; + +const reducePathComponents = (components: readonly string[]) => { + if (!Array.isArray(components) || components.length === 0) { + return []; + } + const reduced = [components[0]]; + for (let i = 1; i < components.length; i++) { + const component = components[i]; + if (!component) continue; + if (component === '.') continue; + if (component === '..') { + if (reduced.length > 1) { + if (reduced[reduced.length - 1] !== '..') { + reduced.pop(); + continue; + } } + else if (reduced[0]) continue; + } + reduced.push(component); + } + return reduced; +}; + +const getPathComponents = (path: string, currentDirectory = '') => { + path = combinePaths(currentDirectory, path); + return pathComponents(path, getRootLength(path)); +}; + +const getRootLength = (path: string) => { + const rootLength = getEncodedRootLength(path); + return rootLength < 0 ? ~rootLength : rootLength; +}; + +const getEncodedRootLength = (path: string): number => { + if (!path) return 0; + const ch0 = path.charCodeAt(0); + + // POSIX or UNC + if (ch0 === CharacterCodes.slash || ch0 === CharacterCodes.backslash) { + if (path.charCodeAt(1) !== ch0) return 1; // POSIX: "/" (or non-normalized "\") + + const p1 = path.indexOf(ch0 === CharacterCodes.slash ? '/' : altDirectorySeparator, 2); + if (p1 < 0) return path.length; // UNC: "//server" or "\\server" + + return p1 + 1; // UNC: "//server/" or "\\server\" + } + + // DOS + if (isVolumeCharacter(ch0) && path.charCodeAt(1) === CharacterCodes.colon) { + const ch2 = path.charCodeAt(2); + if (ch2 === CharacterCodes.slash || ch2 === CharacterCodes.backslash) return 3; // DOS: "c:/" or "c:\" + if (path.length === 2) return 2; // DOS: "c:" (but not "c:d") + } - } else if (str.length > 1) { - str = str.substring(0, str.length - 1); + // URL + const schemeEnd = path.indexOf(urlSchemeSeparator); + if (schemeEnd !== -1) { + const authorityStart = schemeEnd + urlSchemeSeparator.length; + const authorityEnd = path.indexOf('/', authorityStart); + if (authorityEnd !== -1) { // URL: "file:///", "file://server/", "file://server/path" + // For local "file" URLs, include the leading DOS volume (if present). + // Per https://www.ietf.org/rfc/rfc1738.txt, a host of "" or "localhost" is a + // special case interpreted as "the machine from which the URL is being interpreted". + const scheme = path.slice(0, schemeEnd); + const authority = path.slice(authorityStart, authorityEnd); + if (scheme === "file" && (authority === "" || authority === "localhost") && + isVolumeCharacter(path.charCodeAt(authorityEnd + 1))) { + const volumeSeparatorEnd = getFileUrlVolumeSeparatorEnd(path, authorityEnd + 2); + if (volumeSeparatorEnd !== -1) { + if (path.charCodeAt(volumeSeparatorEnd) === CharacterCodes.slash) { + // URL: "file:///c:/", "file://localhost/c:/", "file:///c%3a/", "file://localhost/c%3a/" + return ~(volumeSeparatorEnd + 1); + } + if (volumeSeparatorEnd === path.length) { + // URL: "file:///c:", "file://localhost/c:", "file:///c$3a", "file://localhost/c%3a" + // but not "file:///c:d" or "file:///c%3ad" + return ~volumeSeparatorEnd; + } + } + } + return ~(authorityEnd + 1); // URL: "file://server/", "http://server/" + } + return ~path.length; // URL: "file://server", "http://server" + } + + // relative + return 0; +} + +const isVolumeCharacter = (charCode: number) => + (charCode >= CharacterCodes.a && charCode <= CharacterCodes.z) || + (charCode >= CharacterCodes.A && charCode <= CharacterCodes.Z); + + const getFileUrlVolumeSeparatorEnd = (url: string, start: number) => { + const ch0 = url.charCodeAt(start); + if (ch0 === CharacterCodes.colon) return start + 1; + if (ch0 === CharacterCodes.percent && url.charCodeAt(start + 1) === CharacterCodes._3) { + const ch2 = url.charCodeAt(start + 2); + if (ch2 === CharacterCodes.a || ch2 === CharacterCodes.A) return start + 3; + } + return -1; +}; + +const pathComponents = (path: string, rootLength: number) => { + const root = path.substring(0, rootLength); + const rest = path.substring(rootLength).split('/'); + if (rest.length && !lastOrUndefined(rest)) rest.pop(); + return [root, ...rest]; +}; + +const lastOrUndefined = (array: readonly T[]): T | undefined => + array.length === 0 ? undefined : array[array.length - 1]; + +const combinePaths = (path: string, ...paths: (string | undefined)[]) => { + if (path) path = normalizeSlashes(path); + for (let relativePath of paths) { + if (!relativePath) continue; + relativePath = normalizeSlashes(relativePath); + if (!path || getRootLength(relativePath) !== 0) { + path = relativePath; + } else { + path = ensureTrailingDirectorySeparator(path) + relativePath; } } + return path; +} - return str; +const getPathFromPathComponents = (pathComponents: readonly string[]) => { + if (pathComponents.length === 0) return ''; + const root = pathComponents[0] && ensureTrailingDirectorySeparator(pathComponents[0]); + return root + pathComponents.slice(1).join('/'); }; +const ensureTrailingDirectorySeparator = (path: string) => { + if (!hasTrailingDirectorySeparator(path)) { + return path + '/'; + } + return path; +}; + +const hasTrailingDirectorySeparator = (path: string) => + path.length > 0 && isAnyDirectorySeparator(path.charCodeAt(path.length - 1)); + +const isAnyDirectorySeparator = (charCode: number) => + charCode === CharacterCodes.slash || charCode === CharacterCodes.backslash; + /** * Same as normalizePath(), expect it'll also strip any querystrings * from the path name. So /dir/file.css?tag=cmp-a becomes /dir/file.css */ export const normalizeFsPath = (p: string) => normalizePath(p.split('?')[0]); -const EXTENDED_PATH_REGEX = /^\\\\\?\\/; -const NON_ASCII_REGEX = /[^\x00-\x80]+/; -const SLASH_REGEX = /\\/g; +const enum CharacterCodes { + a = 0x61, + A = 0x41, + z = 0x7A, + Z = 0x5a, + _3 = 0x33, + + backslash = 0x5C, // \ + colon = 0x3A, // : + dot = 0x2E, // . + percent = 0x25, // % + slash = 0x2F, // / +} diff --git a/src/utils/test/normalize-path.spec.ts b/src/utils/test/normalize-path.spec.ts new file mode 100644 index 00000000000..936c27c57ab --- /dev/null +++ b/src/utils/test/normalize-path.spec.ts @@ -0,0 +1,63 @@ +import { normalizePath } from '../normalize-path'; + + +describe('normalizePath', () => { + + it('normalize posix', () => { + expect(normalizePath('/dir/basename.ext')).toBe('/dir/basename.ext'); + expect(normalizePath('/dir')).toBe('/dir'); + }); + + it('normalize win32', () => { + expect(normalizePath('C:\\dir\\basename.ext')).toBe('C:/dir/basename.ext'); + expect(normalizePath('C:\\dir')).toBe('C:/dir'); + }); + + it('non-ascii', () => { + expect(normalizePath('/中文/basename.ext')).toBe('/中文/basename.ext'); + expect(normalizePath('C:\\中文\\basename.ext')).toBe('C:/中文/basename.ext'); + }); + + it('remove trailing slash, windows', () => { + const path = normalizePath(`C:\\Johnny\\B\\Goode\\`); + expect(path).toBe(`C:/Johnny/B/Goode`); + }); + + it('normalize file, windows', () => { + const path = normalizePath(`C:\\Johnny\\B\\Goode.js`); + expect(path).toBe(`C:/Johnny/B/Goode.js`); + }); + + it('not remove trailing slash for root dir, windows', () => { + const path = normalizePath(`C:\\`); + expect(path).toBe(`C:/`); + }); + + it('not remove trailing slash for root dir, unix', () => { + const path = normalizePath(`/`); + expect(path).toBe(`/`); + }); + + it('remove trailing slash, unix', () => { + const path = normalizePath(`/Johnny/B/Goode/`); + expect(path).toBe(`/Johnny/B/Goode`); + }); + + it('normalize file, unix', () => { + const path = normalizePath(`/Johnny/B/Goode.js`); + expect(path).toBe(`/Johnny/B/Goode.js`); + }); + + it('normalize file with spaces to trim', () => { + const path = normalizePath(` /Johnny/B/Goode.js `); + expect(path).toBe(`/Johnny/B/Goode.js`); + }); + + it('throw error when invalid string', () => { + expect(() => { + const path = normalizePath(null); + expect(path).toBe(`/Johnny/B/Goode.js`); + }).toThrow(); + }); + +}); diff --git a/src/utils/test/util.spec.ts b/src/utils/test/util.spec.ts index 7223da31dc8..937e6ac1408 100644 --- a/src/utils/test/util.spec.ts +++ b/src/utils/test/util.spec.ts @@ -1,55 +1,8 @@ import * as util from '../util'; -import { normalizePath } from '../normalize-path'; describe('util', () => { - describe('normalizePath', () => { - - it('remove trailing slash, windows', () => { - const path = normalizePath(`C:\\Johnny\\B\\Goode\\`); - expect(path).toBe(`C:/Johnny/B/Goode`); - }); - - it('normalize file, windows', () => { - const path = normalizePath(`C:\\Johnny\\B\\Goode.js`); - expect(path).toBe(`C:/Johnny/B/Goode.js`); - }); - - it('not remove trailing slash for root dir, windows', () => { - const path = normalizePath(`C:\\`); - expect(path).toBe(`C:/`); - }); - - it('not remove trailing slash for root dir, unix', () => { - const path = normalizePath(`/`); - expect(path).toBe(`/`); - }); - - it('remove trailing slash, unix', () => { - const path = normalizePath(`/Johnny/B/Goode/`); - expect(path).toBe(`/Johnny/B/Goode`); - }); - - it('normalize file, unix', () => { - const path = normalizePath(`/Johnny/B/Goode.js`); - expect(path).toBe(`/Johnny/B/Goode.js`); - }); - - it('normalize file with spaces to trim', () => { - const path = normalizePath(` /Johnny/B/Goode.js `); - expect(path).toBe(`/Johnny/B/Goode.js`); - }); - - it('throw error when invalid string', () => { - expect(() => { - const path = normalizePath(null); - expect(path).toBe(`/Johnny/B/Goode.js`); - }).toThrow(); - }); - - }); - describe('isTsFile', () => { it('should return true for regular .ts and .tsx files', () => { expect(util.isTsFile('.ts')).toEqual(true);