Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: links are queued just once #154

Merged
merged 3 commits into from
Feb 29, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 6 additions & 8 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -71,13 +71,16 @@ export class LinkChecker extends EventEmitter {
});

const results = new Array<LinkResult>();
const url = new URL(options.path);
const initCache: Set<string> = new Set();
initCache.add(url.href);
queue.add(async () => {
await this.crawl({
url: new URL(options.path),
crawl: true,
checkOptions: options,
results,
cache: new Set(),
cache: initCache,
queue,
});
});
Expand Down Expand Up @@ -116,13 +119,7 @@ export class LinkChecker extends EventEmitter {
* @private
* @returns A list of crawl results consisting of urls and status codes
*/
private async crawl(opts: CrawlOptions): Promise<void> {
// Check to see if we've already scanned this url
if (opts.cache.has(opts.url.href)) {
return;
}
opts.cache.add(opts.url.href);

async crawl(opts: CrawlOptions): Promise<void> {
// explicitly skip non-http[s] links before making the request
const proto = opts.url.protocol;
if (proto !== 'http:' && proto !== 'https:') {
Expand Down Expand Up @@ -248,6 +245,7 @@ export class LinkChecker extends EventEmitter {
// Ensure the url hasn't already been touched, largely to avoid a
// very large queue length and runaway memory consumption
if (!opts.cache.has(result.url.href)) {
opts.cache.add(result.url.href);
opts.queue.add(async () => {
await this.crawl({
url: result.url!,
Expand Down
15 changes: 14 additions & 1 deletion test/test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import * as nock from 'nock';
import * as sinon from 'sinon';
import * as path from 'path';

import { check, LinkState } from '../src';
import { check, LinkState, LinkChecker } from '../src';

nock.disableNetConnect();
nock.enableNetConnect('localhost');
Expand Down Expand Up @@ -33,6 +33,19 @@ describe('linkinator', () => {
scope.done();
});

it('should only queue a link once', async () => {
const scope = nock('http://fake.local')
.head('/')
.reply(200);
const checker = new LinkChecker();
const checkerSpy = sinon.spy(checker, 'crawl');
const results = await checker.check({ path: 'test/fixtures/twice' });
assert.ok(results.passed);
assert.strictEqual(results.links.length, 2);
assert.strictEqual(checkerSpy.callCount, 2);
scope.done();
});

it('should skip links if asked nicely', async () => {
const results = await check({
path: 'test/fixtures/skip',
Expand Down