From 5f894587b80060617b19fedd2784a3d76f1822b3 Mon Sep 17 00:00:00 2001 From: Xiaozhen Liu Date: Fri, 28 Feb 2020 16:59:16 -0800 Subject: [PATCH] fix(perf): links are queued just once (#154) --- src/index.ts | 14 ++++++-------- test/test.ts | 15 ++++++++++++++- 2 files changed, 20 insertions(+), 9 deletions(-) diff --git a/src/index.ts b/src/index.ts index a3524721..02f05fc6 100644 --- a/src/index.ts +++ b/src/index.ts @@ -71,13 +71,16 @@ export class LinkChecker extends EventEmitter { }); const results = new Array(); + const url = new URL(options.path); + const initCache: Set = new Set(); + initCache.add(url.href); queue.add(async () => { await this.crawl({ url: new URL(options.path), crawl: true, checkOptions: options, results, - cache: new Set(), + cache: initCache, queue, }); }); @@ -116,13 +119,7 @@ export class LinkChecker extends EventEmitter { * @private * @returns A list of crawl results consisting of urls and status codes */ - private async crawl(opts: CrawlOptions): Promise { - // Check to see if we've already scanned this url - if (opts.cache.has(opts.url.href)) { - return; - } - opts.cache.add(opts.url.href); - + async crawl(opts: CrawlOptions): Promise { // explicitly skip non-http[s] links before making the request const proto = opts.url.protocol; if (proto !== 'http:' && proto !== 'https:') { @@ -248,6 +245,7 @@ export class LinkChecker extends EventEmitter { // Ensure the url hasn't already been touched, largely to avoid a // very large queue length and runaway memory consumption if (!opts.cache.has(result.url.href)) { + opts.cache.add(result.url.href); opts.queue.add(async () => { await this.crawl({ url: result.url!, diff --git a/test/test.ts b/test/test.ts index 57154918..bbb60556 100644 --- a/test/test.ts +++ b/test/test.ts @@ -4,7 +4,7 @@ import * as nock from 'nock'; import * as sinon from 'sinon'; import * as path from 'path'; -import { check, LinkState } from '../src'; +import { check, LinkState, LinkChecker } from '../src'; nock.disableNetConnect(); nock.enableNetConnect('localhost'); @@ -33,6 +33,19 @@ describe('linkinator', () => { scope.done(); }); + it('should only queue a link once', async () => { + const scope = nock('http://fake.local') + .head('/') + .reply(200); + const checker = new LinkChecker(); + const checkerSpy = sinon.spy(checker, 'crawl'); + const results = await checker.check({ path: 'test/fixtures/twice' }); + assert.ok(results.passed); + assert.strictEqual(results.links.length, 2); + assert.strictEqual(checkerSpy.callCount, 2); + scope.done(); + }); + it('should skip links if asked nicely', async () => { const results = await check({ path: 'test/fixtures/skip',