Skip to content

Commit

Permalink
fix(normalizePath): ensure non-ascii paths can be normalized
Browse files Browse the repository at this point in the history
Close #2161
  • Loading branch information
adamdbradley committed Jan 26, 2020
1 parent 252881f commit a97c2f5
Show file tree
Hide file tree
Showing 3 changed files with 240 additions and 68 deletions.
198 changes: 177 additions & 21 deletions src/utils/normalize-path.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,43 +5,199 @@
* extended-length paths and don't contain any non-ascii characters.
* This was created since the path methods in Node.js outputs \\ paths on Windows.
*/
export const normalizePath = (str: string): string => {
// https://github.com/sindresorhus/slash MIT
// By Sindre Sorhus
if (typeof str !== 'string') {
export const normalizePath = (path: string) => {
if (typeof path !== 'string') {
throw new Error(`invalid path to normalize`);
}
str = str.trim();
path = normalizeSlashes(path);
let normalized = getPathFromPathComponents(reducePathComponents(getPathComponents(path)));
if (normalized) {
normalized = normalized.trim();

if (EXTENDED_PATH_REGEX.test(str) || NON_ASCII_REGEX.test(str)) {
return str;
// always remove the trailing /
// this makes our file cache look ups consistent
if (normalized.charAt(normalized.length - 1) === '/') {
const colonIndex = normalized.indexOf(':');
if (colonIndex > -1) {
if (colonIndex < normalized.length - 2) {
normalized = normalized.substring(0, normalized.length - 1);
}
} else if (normalized.length > 1) {
normalized = normalized.substring(0, normalized.length - 1);
}
}
}
return normalized;
};

str = str.replace(SLASH_REGEX, '/');
const normalizeSlashes = (path: string) => path.replace(backslashRegExp, '/');

// always remove the trailing /
// this makes our file cache look ups consistent
if (str.charAt(str.length - 1) === '/') {
const colonIndex = str.indexOf(':');
if (colonIndex > -1) {
if (colonIndex < str.length - 2) {
str = str.substring(0, str.length - 1);
const altDirectorySeparator = "\\";
const urlSchemeSeparator = "://";
const backslashRegExp = /\\/g;

const reducePathComponents = (components: readonly string[]) => {
if (!Array.isArray(components) || components.length === 0) {
return [];
}
const reduced = [components[0]];
for (let i = 1; i < components.length; i++) {
const component = components[i];
if (!component) continue;
if (component === '.') continue;
if (component === '..') {
if (reduced.length > 1) {
if (reduced[reduced.length - 1] !== '..') {
reduced.pop();
continue;
}
}
else if (reduced[0]) continue;
}
reduced.push(component);
}
return reduced;
};

const getPathComponents = (path: string, currentDirectory = '') => {
path = combinePaths(currentDirectory, path);
return pathComponents(path, getRootLength(path));
};

const getRootLength = (path: string) => {
const rootLength = getEncodedRootLength(path);
return rootLength < 0 ? ~rootLength : rootLength;
};

const getEncodedRootLength = (path: string): number => {
if (!path) return 0;
const ch0 = path.charCodeAt(0);

// POSIX or UNC
if (ch0 === CharacterCodes.slash || ch0 === CharacterCodes.backslash) {
if (path.charCodeAt(1) !== ch0) return 1; // POSIX: "/" (or non-normalized "\")

const p1 = path.indexOf(ch0 === CharacterCodes.slash ? '/' : altDirectorySeparator, 2);
if (p1 < 0) return path.length; // UNC: "//server" or "\\server"

return p1 + 1; // UNC: "//server/" or "\\server\"
}

// DOS
if (isVolumeCharacter(ch0) && path.charCodeAt(1) === CharacterCodes.colon) {
const ch2 = path.charCodeAt(2);
if (ch2 === CharacterCodes.slash || ch2 === CharacterCodes.backslash) return 3; // DOS: "c:/" or "c:\"
if (path.length === 2) return 2; // DOS: "c:" (but not "c:d")
}

} else if (str.length > 1) {
str = str.substring(0, str.length - 1);
// URL
const schemeEnd = path.indexOf(urlSchemeSeparator);
if (schemeEnd !== -1) {
const authorityStart = schemeEnd + urlSchemeSeparator.length;
const authorityEnd = path.indexOf('/', authorityStart);
if (authorityEnd !== -1) { // URL: "file:///", "file://server/", "file://server/path"
// For local "file" URLs, include the leading DOS volume (if present).
// Per https://www.ietf.org/rfc/rfc1738.txt, a host of "" or "localhost" is a
// special case interpreted as "the machine from which the URL is being interpreted".
const scheme = path.slice(0, schemeEnd);
const authority = path.slice(authorityStart, authorityEnd);
if (scheme === "file" && (authority === "" || authority === "localhost") &&
isVolumeCharacter(path.charCodeAt(authorityEnd + 1))) {
const volumeSeparatorEnd = getFileUrlVolumeSeparatorEnd(path, authorityEnd + 2);
if (volumeSeparatorEnd !== -1) {
if (path.charCodeAt(volumeSeparatorEnd) === CharacterCodes.slash) {
// URL: "file:///c:/", "file://localhost/c:/", "file:///c%3a/", "file://localhost/c%3a/"
return ~(volumeSeparatorEnd + 1);
}
if (volumeSeparatorEnd === path.length) {
// URL: "file:///c:", "file://localhost/c:", "file:///c$3a", "file://localhost/c%3a"
// but not "file:///c:d" or "file:///c%3ad"
return ~volumeSeparatorEnd;
}
}
}
return ~(authorityEnd + 1); // URL: "file://server/", "http://server/"
}
return ~path.length; // URL: "file://server", "http://server"
}

// relative
return 0;
}

const isVolumeCharacter = (charCode: number) =>
(charCode >= CharacterCodes.a && charCode <= CharacterCodes.z) ||
(charCode >= CharacterCodes.A && charCode <= CharacterCodes.Z);

const getFileUrlVolumeSeparatorEnd = (url: string, start: number) => {
const ch0 = url.charCodeAt(start);
if (ch0 === CharacterCodes.colon) return start + 1;
if (ch0 === CharacterCodes.percent && url.charCodeAt(start + 1) === CharacterCodes._3) {
const ch2 = url.charCodeAt(start + 2);
if (ch2 === CharacterCodes.a || ch2 === CharacterCodes.A) return start + 3;
}
return -1;
};

const pathComponents = (path: string, rootLength: number) => {
const root = path.substring(0, rootLength);
const rest = path.substring(rootLength).split('/');
if (rest.length && !lastOrUndefined(rest)) rest.pop();
return [root, ...rest];
};

const lastOrUndefined = <T>(array: readonly T[]): T | undefined =>
array.length === 0 ? undefined : array[array.length - 1];

const combinePaths = (path: string, ...paths: (string | undefined)[]) => {
if (path) path = normalizeSlashes(path);
for (let relativePath of paths) {
if (!relativePath) continue;
relativePath = normalizeSlashes(relativePath);
if (!path || getRootLength(relativePath) !== 0) {
path = relativePath;
} else {
path = ensureTrailingDirectorySeparator(path) + relativePath;
}
}
return path;
}

return str;
const getPathFromPathComponents = (pathComponents: readonly string[]) => {
if (pathComponents.length === 0) return '';
const root = pathComponents[0] && ensureTrailingDirectorySeparator(pathComponents[0]);
return root + pathComponents.slice(1).join('/');
};

const ensureTrailingDirectorySeparator = (path: string) => {
if (!hasTrailingDirectorySeparator(path)) {
return path + '/';
}
return path;
};

const hasTrailingDirectorySeparator = (path: string) =>
path.length > 0 && isAnyDirectorySeparator(path.charCodeAt(path.length - 1));

const isAnyDirectorySeparator = (charCode: number) =>
charCode === CharacterCodes.slash || charCode === CharacterCodes.backslash;

/**
* Same as normalizePath(), expect it'll also strip any querystrings
* from the path name. So /dir/file.css?tag=cmp-a becomes /dir/file.css
*/
export const normalizeFsPath = (p: string) => normalizePath(p.split('?')[0]);

const EXTENDED_PATH_REGEX = /^\\\\\?\\/;
const NON_ASCII_REGEX = /[^\x00-\x80]+/;
const SLASH_REGEX = /\\/g;
const enum CharacterCodes {
a = 0x61,
A = 0x41,
z = 0x7A,
Z = 0x5a,
_3 = 0x33,

backslash = 0x5C, // \
colon = 0x3A, // :
dot = 0x2E, // .
percent = 0x25, // %
slash = 0x2F, // /
}
63 changes: 63 additions & 0 deletions src/utils/test/normalize-path.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
import { normalizePath } from '../normalize-path';


describe('normalizePath', () => {

it('normalize posix', () => {
expect(normalizePath('/dir/basename.ext')).toBe('/dir/basename.ext');
expect(normalizePath('/dir')).toBe('/dir');
});

it('normalize win32', () => {
expect(normalizePath('C:\\dir\\basename.ext')).toBe('C:/dir/basename.ext');
expect(normalizePath('C:\\dir')).toBe('C:/dir');
});

it('non-ascii', () => {
expect(normalizePath('/中文/basename.ext')).toBe('/中文/basename.ext');
expect(normalizePath('C:\\中文\\basename.ext')).toBe('C:/中文/basename.ext');
});

it('remove trailing slash, windows', () => {
const path = normalizePath(`C:\\Johnny\\B\\Goode\\`);
expect(path).toBe(`C:/Johnny/B/Goode`);
});

it('normalize file, windows', () => {
const path = normalizePath(`C:\\Johnny\\B\\Goode.js`);
expect(path).toBe(`C:/Johnny/B/Goode.js`);
});

it('not remove trailing slash for root dir, windows', () => {
const path = normalizePath(`C:\\`);
expect(path).toBe(`C:/`);
});

it('not remove trailing slash for root dir, unix', () => {
const path = normalizePath(`/`);
expect(path).toBe(`/`);
});

it('remove trailing slash, unix', () => {
const path = normalizePath(`/Johnny/B/Goode/`);
expect(path).toBe(`/Johnny/B/Goode`);
});

it('normalize file, unix', () => {
const path = normalizePath(`/Johnny/B/Goode.js`);
expect(path).toBe(`/Johnny/B/Goode.js`);
});

it('normalize file with spaces to trim', () => {
const path = normalizePath(` /Johnny/B/Goode.js `);
expect(path).toBe(`/Johnny/B/Goode.js`);
});

it('throw error when invalid string', () => {
expect(() => {
const path = normalizePath(null);
expect(path).toBe(`/Johnny/B/Goode.js`);
}).toThrow();
});

});
47 changes: 0 additions & 47 deletions src/utils/test/util.spec.ts
Original file line number Diff line number Diff line change
@@ -1,55 +1,8 @@
import * as util from '../util';
import { normalizePath } from '../normalize-path';


describe('util', () => {

describe('normalizePath', () => {

it('remove trailing slash, windows', () => {
const path = normalizePath(`C:\\Johnny\\B\\Goode\\`);
expect(path).toBe(`C:/Johnny/B/Goode`);
});

it('normalize file, windows', () => {
const path = normalizePath(`C:\\Johnny\\B\\Goode.js`);
expect(path).toBe(`C:/Johnny/B/Goode.js`);
});

it('not remove trailing slash for root dir, windows', () => {
const path = normalizePath(`C:\\`);
expect(path).toBe(`C:/`);
});

it('not remove trailing slash for root dir, unix', () => {
const path = normalizePath(`/`);
expect(path).toBe(`/`);
});

it('remove trailing slash, unix', () => {
const path = normalizePath(`/Johnny/B/Goode/`);
expect(path).toBe(`/Johnny/B/Goode`);
});

it('normalize file, unix', () => {
const path = normalizePath(`/Johnny/B/Goode.js`);
expect(path).toBe(`/Johnny/B/Goode.js`);
});

it('normalize file with spaces to trim', () => {
const path = normalizePath(` /Johnny/B/Goode.js `);
expect(path).toBe(`/Johnny/B/Goode.js`);
});

it('throw error when invalid string', () => {
expect(() => {
const path = normalizePath(null);
expect(path).toBe(`/Johnny/B/Goode.js`);
}).toThrow();
});

});

describe('isTsFile', () => {
it('should return true for regular .ts and .tsx files', () => {
expect(util.isTsFile('.ts')).toEqual(true);
Expand Down

0 comments on commit a97c2f5

Please sign in to comment.