diff --git a/lighthouse-core/index.js b/lighthouse-core/index.js index 52c6b8392009..da7b8b183b48 100644 --- a/lighthouse-core/index.js +++ b/lighthouse-core/index.js @@ -10,6 +10,9 @@ const log = require('lighthouse-logger'); const ChromeProtocol = require('./gather/connections/cri.js'); const Config = require('./config/config'); +const URL = require('./lib/url-shim.js'); +const LHError = require('./lib/lh-error.js'); + /** @typedef {import('./gather/connections/connection.js')} Connection */ /* @@ -36,6 +39,11 @@ const Config = require('./config/config'); * @return {Promise<LH.RunnerResult|undefined>} */ async function lighthouse(url, flags = {}, configJSON, connection) { + // verify the url is valid and that protocol is allowed + if (url && (!URL.isValid(url) || !URL.isProtocolAllowed(url))) { + throw new LHError(LHError.errors.INVALID_URL); + } + // set logging preferences, assume quiet flags.logLevel = flags.logLevel || 'error'; log.setLevel(flags.logLevel); diff --git a/lighthouse-core/lib/lh-error.js b/lighthouse-core/lib/lh-error.js index 9409f0db73e6..1b623fcf0bd3 100644 --- a/lighthouse-core/lib/lh-error.js +++ b/lighthouse-core/lib/lh-error.js @@ -118,6 +118,12 @@ const ERRORS = { code: 'REQUEST_CONTENT_TIMEOUT', message: strings.requestContentTimeout, }, + + // URL parsing failures + INVALID_URL: { + code: 'INVALID_URL', + message: strings.urlInvalid, + }, }; /** @type {Record<keyof typeof ERRORS, LighthouseErrorDefinition>} */ diff --git a/lighthouse-core/lib/strings.js b/lighthouse-core/lib/strings.js index 614bfda6081f..d6f56f5a1362 100644 --- a/lighthouse-core/lib/strings.js +++ b/lighthouse-core/lib/strings.js @@ -13,4 +13,5 @@ module.exports = { pageLoadFailed: `Your page failed to load. Verify that the URL is valid and re-run Lighthouse.`, internalChromeError: `An internal Chrome error occurred. Please restart Chrome and try re-running Lighthouse.`, requestContentTimeout: 'Fetching resource content has exceeded the allotted time', + urlInvalid: `The URL you have provided appears to be invalid.`, }; diff --git a/lighthouse-core/lib/url-shim.js b/lighthouse-core/lib/url-shim.js index aaeb79ea7651..8327efff33c7 100644 --- a/lighthouse-core/lib/url-shim.js +++ b/lighthouse-core/lib/url-shim.js @@ -23,6 +23,11 @@ const listOfTlds = [ 'com', 'co', 'gov', 'edu', 'ac', 'org', 'go', 'gob', 'or', 'net', 'in', 'ne', 'nic', 'gouv', 'web', 'spb', 'blog', 'jus', 'kiev', 'mil', 'wi', 'qc', 'ca', 'bel', 'on', ]; + +const allowedProtocols = [ + 'https:', 'http:', 'chrome:', +]; + /** * There is fancy URL rewriting logic for the chrome://settings page that we need to work around. * Why? Special handling was added by Chrome team to allow a pushState transition between chrome:// pages. @@ -184,6 +189,20 @@ class URLShim extends URL { return false; } } + + /** + * Determine if the url has a protocol that we're able to test + * @param {string} url + * @return {boolean} + */ + static isProtocolAllowed(url) { + try { + const parsed = new URL(url); + return allowedProtocols.includes(parsed.protocol); + } catch (e) { + return false; + } + } } URLShim.URL = URL; diff --git a/lighthouse-core/test/index-test.js b/lighthouse-core/test/index-test.js index faccc54712fb..99190946f006 100644 --- a/lighthouse-core/test/index-test.js +++ b/lighthouse-core/test/index-test.js @@ -52,7 +52,7 @@ describe('Module Tests', function() { }); it('should throw an error when the second parameter is not an object', function() { - return lighthouse('SOME_URL', 'flags') + return lighthouse('chrome://version', 'flags') .then(() => { throw new Error('Should not have resolved when second arg is not an object'); }, err => { @@ -61,7 +61,7 @@ describe('Module Tests', function() { }); it('should throw an error when the config is invalid', function() { - return lighthouse('SOME_URL', {}, {}) + return lighthouse('chrome://version', {}, {}) .then(() => { throw new Error('Should not have resolved when second arg is not an object'); }, err => { @@ -70,7 +70,7 @@ describe('Module Tests', function() { }); it('should throw an error when the config contains incorrect audits', function() { - return lighthouse('SOME_URL', {}, { + return lighthouse('chrome://version', {}, { passes: [{ gatherers: [ 'viewport', @@ -87,6 +87,24 @@ describe('Module Tests', function() { }); }); + it('should throw an error when the url is invalid', function() { + return lighthouse('https:/i-am-not-valid', {}, {}) + .then(() => { + throw new Error('Should not have resolved when url is invalid'); + }, err => { + assert.ok(err); + }); + }); + + it('should throw an error when the url is invalid protocol (file:///)', function() { + return lighthouse('file:///a/fake/index.html', {}, {}) + .then(() => { + throw new Error('Should not have resolved when url is file:///'); + }, err => { + assert.ok(err); + }); + }); + it('should return formatted LHR when given no categories', function() { const exampleUrl = 'https://www.reddit.com/r/nba'; return lighthouse(exampleUrl, { diff --git a/lighthouse-core/test/lib/url-shim-test.js b/lighthouse-core/test/lib/url-shim-test.js index 9ba2f0a1de73..236127de7fd2 100644 --- a/lighthouse-core/test/lib/url-shim-test.js +++ b/lighthouse-core/test/lib/url-shim-test.js @@ -36,6 +36,18 @@ describe('URL Shim', () => { assert.equal(URL.isValid('eval(<context>):45:16'), false); }); + it('safely identifies allowed URL protocols', () => { + assert.ok(URL.isProtocolAllowed('http://google.com/')); + assert.ok(URL.isProtocolAllowed('https://google.com/')); + assert.ok(URL.isProtocolAllowed('chrome://version')); + }); + + it('safely identifies disallowed URL protocols', () => { + assert.equal(URL.isProtocolAllowed('file:///i/am/a/fake/file.html'), false); + assert.equal(URL.isProtocolAllowed('ftp://user:password@private.ftp.example.com/index.html'), false); + assert.equal(URL.isProtocolAllowed('gopher://underground:9090/path'), false); + }); + it('safely identifies same hosts', () => { const urlA = 'https://5321212.fls.net/page?query=string#hash'; const urlB = 'http://5321212.fls.net/deeply/nested/page';