Skip to content

Commit

Permalink
fix(perf): links are queued just once (#154)
Browse files Browse the repository at this point in the history
  • Loading branch information
xiaozhenliu-gg5 committed Feb 29, 2020
1 parent aa139da commit 5f89458
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 9 deletions.
14 changes: 6 additions & 8 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -71,13 +71,16 @@ export class LinkChecker extends EventEmitter {
});

const results = new Array<LinkResult>();
const url = new URL(options.path);
const initCache: Set<string> = new Set();
initCache.add(url.href);
queue.add(async () => {
await this.crawl({
url: new URL(options.path),
crawl: true,
checkOptions: options,
results,
cache: new Set(),
cache: initCache,
queue,
});
});
Expand Down Expand Up @@ -116,13 +119,7 @@ export class LinkChecker extends EventEmitter {
* @private
* @returns A list of crawl results consisting of urls and status codes
*/
private async crawl(opts: CrawlOptions): Promise<void> {
// Check to see if we've already scanned this url
if (opts.cache.has(opts.url.href)) {
return;
}
opts.cache.add(opts.url.href);

async crawl(opts: CrawlOptions): Promise<void> {
// explicitly skip non-http[s] links before making the request
const proto = opts.url.protocol;
if (proto !== 'http:' && proto !== 'https:') {
Expand Down Expand Up @@ -248,6 +245,7 @@ export class LinkChecker extends EventEmitter {
// Ensure the url hasn't already been touched, largely to avoid a
// very large queue length and runaway memory consumption
if (!opts.cache.has(result.url.href)) {
opts.cache.add(result.url.href);
opts.queue.add(async () => {
await this.crawl({
url: result.url!,
Expand Down
15 changes: 14 additions & 1 deletion test/test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import * as nock from 'nock';
import * as sinon from 'sinon';
import * as path from 'path';

import { check, LinkState } from '../src';
import { check, LinkState, LinkChecker } from '../src';

nock.disableNetConnect();
nock.enableNetConnect('localhost');
Expand Down Expand Up @@ -33,6 +33,19 @@ describe('linkinator', () => {
scope.done();
});

it('should only queue a link once', async () => {
const scope = nock('http://fake.local')
.head('/')
.reply(200);
const checker = new LinkChecker();
const checkerSpy = sinon.spy(checker, 'crawl');
const results = await checker.check({ path: 'test/fixtures/twice' });
assert.ok(results.passed);
assert.strictEqual(results.links.length, 2);
assert.strictEqual(checkerSpy.callCount, 2);
scope.done();
});

it('should skip links if asked nicely', async () => {
const results = await check({
path: 'test/fixtures/skip',
Expand Down

0 comments on commit 5f89458

Please sign in to comment.