From 96602b5a7fee497dd7d3456fb0de993eedd27779 Mon Sep 17 00:00:00 2001 From: Greg Price Date: Wed, 16 Dec 2020 15:30:49 -0800 Subject: [PATCH] url: Add and test predicates isUrlAbsolute, isUrlPathAbsolute, etc. These will let us do certain URL parsing and reassembly exactly, relying only on assumptions we can make explicit. In particular, we'll use it to interpret avatar URLs that might be either absolute, or path-absolute and meant to be relative to the realm URL. --- src/utils/__tests__/url-test.js | 59 +++++++++++++++++++++++++++++++++ src/utils/url.js | 53 +++++++++++++++++++++++++++++ 2 files changed, 112 insertions(+) diff --git a/src/utils/__tests__/url-test.js b/src/utils/__tests__/url-test.js index b27c5c197f0..c4091cf7398 100644 --- a/src/utils/__tests__/url-test.js +++ b/src/utils/__tests__/url-test.js @@ -7,10 +7,69 @@ import { fixRealmUrl, autocompleteRealmPieces, autocompleteRealm, + isUrlAbsolute, + isUrlRelative, + isUrlPathAbsolute, } from '../url'; import type { Auth } from '../../types'; import type { AutocompletionDefaults } from '../url'; +const urlClassifierCases = { + // These data are mostly a selection from this resource: + // https://github.com/web-platform-tests/wpt/blob/master/url/resources/urltestdata.json + // which is referred to at the top of the URL Standard. + absolute: ['https://example.com/foo', 'a1234567890-+.:foo/bar', 'AB://c/d'], + pathAbsolute: ['/', '/foo/bar', '/.//path', '/../localhost/', '/:23', '/a/ /c'], + otherRelative: [ + '//example.com/foo', + '//foo/bar', + '//', + '///', + '///test', + '//www.example2.com', + '10.0.0.7:8080/foo.html', + 'a!@$*=/foo.html', + '#β', + ], +}; + +const urlClassifierData = Object.keys(urlClassifierCases).flatMap(key => + urlClassifierCases[key].map(url => ({ + url, + absolute: key === 'absolute', + relative: key !== 'absolute', + pathAbsolute: key === 'pathAbsolute', + })), +); + +/* eslint-disable no-underscore-dangle */ +describe('isUrlAbsolute', () => { + for (const case_ of urlClassifierData) { + const { url, absolute: expected } = case_; + test(`${expected ? 'accept' : 'reject'} ${url}`, () => { + expect(isUrlAbsolute(url)).toEqual(expected); + }); + } +}); + +describe('isUrlRelative', () => { + for (const case_ of urlClassifierData) { + const { url, relative: expected } = case_; + test(`${expected ? 'accept' : 'reject'} ${url}`, () => { + expect(isUrlRelative(url)).toEqual(expected); + }); + } +}); + +describe('isUrlPathAbsolute', () => { + for (const case_ of urlClassifierData) { + const { url, pathAbsolute: expected } = case_; + test(`${expected ? 'accept' : 'reject'} ${url}`, () => { + expect(isUrlPathAbsolute(url)).toEqual(expected); + }); + } +}); + describe('getResource', () => { test('when uri contains domain, do not change, add auth headers', () => { const auth: Auth = { diff --git a/src/utils/url.js b/src/utils/url.js index 179a4ce1cc0..9ef2f2f1606 100644 --- a/src/utils/url.js +++ b/src/utils/url.js @@ -29,6 +29,59 @@ export const encodeParamsForUrl = (params: UrlParams): string => .map(([key, value]) => `${encodeURIComponent(key)}=${encodeURIComponent(value.toString())}`) .join('&'); +/** + * Test for an absolute URL, assuming a valid URL. + * + * Specifically, we assume the input is a "valid URL string" as defined by + * the URL Standard: + * https://url.spec.whatwg.org/#url-writing + * and return true just if it's an "absolute-URL-with-fragment string". + * + * If the input is not a valid URL string, the result is unspecified. + */ +export const isUrlAbsolute = (url: string): boolean => + // True just if the string starts with a "URL-scheme string", then `:`. + // Every "absolute-URL string" must do so. + // Every "relative-URL string" must not do so: either it starts with a + // "path-relative-scheme-less-URL string", or it starts with `/`. + url.match(/^[a-zA-Z][a-zA-Z0-9+.-]*:/) !== null; + +/** + * Test for a relative URL string, assuming a valid URL. + * + * Specifically, we assume the input is a "valid URL string" as defined by + * the URL Standard: + * https://url.spec.whatwg.org/#url-writing + * and return true just if it's a "relative-URL-with-fragment string". + * + * If the input is not a valid URL string, the result is unspecified. + */ +export const isUrlRelative = (url: string): boolean => !isUrlAbsolute(url); + +/** + * Test for a path-absolute URL string, assuming a valid URL. + * + * Specifically, we assume the input is a "valid URL string" as defined by + * the URL Standard: + * https://url.spec.whatwg.org/#url-writing + * and return true just if it's a "path-absolute-URL string". + * + * This is the kind like "/foo/bar" that keeps the part of the base URL + * before the path, and replaces the rest. + * + * Specifically this is a kind of relative URL string: so when this returns + * true (for a valid URL), `isUrlRelative` will always also return true and + * `isUrlAbsolute` will return false. + */ +export const isUrlPathAbsolute = (url: string): boolean => + // A "path-absolute URL string" must start with `/` and not `//`. + // On the other hand: + // * a "path-relative scheme-less-URL string" must not start with `/`; + // * the other forms of "relative-URL string" all must start with `//`. + !!url.match(/^\/($|[^\/])/); // eslint-disable-line no-useless-escape +// ESLint says one of these slashes could be written unescaped. +// But that seems like a recipe for confusion, so we escape them both. + /** Just like `new URL`, but on error return undefined instead of throwing. */ export const tryParseUrl = (url: string, base?: string | URL): URL | void => { try {