Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add missing --include flag & linksToInclude option #105

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ Behold my latest inator! The `linkinator` provides an API and CLI for crawling w
- 🔥Easily perform scans on remote sites or local files
- 🔥Scan any element that includes links, not just `<a href>`
- 🔥Supports redirects, absolute links, relative links, all the things
- 🔥Configure specific regex patterns to skip
- 🔥Configure specific regex patterns to skip and ignore it when needed

## Installation

Expand Down Expand Up @@ -128,6 +128,7 @@ Asynchronous method that runs a site wide scan. Options come in the form of an o
- `port` (number) - When the `path` is provided as a local path on disk, the `port` on which to start the temporary web server. Defaults to a random high range order port.
- `recurse` (boolean) - By default, all scans are shallow. Only the top level links on the requested page will be scanned. By setting `recurse` to `true`, the crawler will follow all links on the page, and continue scanning links **on the same domain** for as long as it can go. Results are cached, so no worries about loops.
- `linksToSkip` (array) - An array of regular expression strings that should be skipped during the scan.
- `linksToInclude` (array) - An array of regular expression strings that should be included even if it should be skipped by rules during the scan.

#### linkinator.LinkChecker()
Constructor method that can be used to create a new `LinkChecker` instance. This is particularly useful if you want to receive events as the crawler crawls. Exposes the following events:
Expand Down Expand Up @@ -211,6 +212,9 @@ async function complex() {
// 'https://jbeckwith.com/some/link',
// 'http://example.com'
// ]
// linksToInclude: [
// /important/i
// ]
});

// Check to see if the scan passed!
Expand Down
12 changes: 12 additions & 0 deletions src/cli.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@ const cli = meow(
--skip, -s
List of urls in regexy form to not include in the check.

--include, -i
List of urls in regexy form to force include in the check.

--format, -f
Return the data in CSV or JSON format.

Expand All @@ -48,6 +51,7 @@ const cli = meow(
$ linkinator https://www.google.com
$ linkinator . --recurse
$ linkinator . --skip www.googleapis.com
$ linkinator . --skip www.googleapis.com --include /api/v2
$ linkinator . --format CSV
`,
{
Expand All @@ -56,6 +60,7 @@ const cli = meow(
concurrency: { type: 'string' },
recurse: { type: 'boolean', alias: 'r', default: undefined },
skip: { type: 'string', alias: 's' },
include: { type: 'string', alias: 'i' },
format: { type: 'string', alias: 'f' },
silent: { type: 'boolean', default: undefined },
},
Expand Down Expand Up @@ -115,6 +120,13 @@ async function main() {
opts.linksToSkip = flags.skip;
}
}
if (flags.include) {
if (typeof flags.include === 'string') {
opts.linksToInclude = flags.include.split(' ').filter(x => !!x);
} else if (Array.isArray(flags.include)) {
opts.linksToInclude = flags.include;
}
}
const result = await checker.check(opts);
log();

Expand Down
1 change: 1 addition & 0 deletions src/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ export interface Flags {
config?: string;
recurse?: boolean;
skip?: string;
include?: string;
format?: string;
silent?: boolean;
}
Expand Down
14 changes: 11 additions & 3 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ export interface CheckOptions {
port?: number;
path: string;
recurse?: boolean;
linksToSkip?: string[];
linksToSkip?: Array<string | RegExp>;
linksToInclude?: Array<string | RegExp>;
}

export enum LinkState {
Expand Down Expand Up @@ -58,6 +59,8 @@ export class LinkChecker extends EventEmitter {
*/
async check(options: CheckOptions) {
options.linksToSkip = options.linksToSkip || [];
options.linksToInclude = options.linksToInclude || [];

let server: http.Server | undefined;
if (!options.path.startsWith('http')) {
const port = options.port || 5000 + Math.round(Math.random() * 1000);
Expand Down Expand Up @@ -142,9 +145,14 @@ export class LinkChecker extends EventEmitter {
.linksToSkip!.map(linkToSkip => {
return new RegExp(linkToSkip).test(opts.url.href);
})
.filter(match => !!match);
.filter(Boolean);
const forceInclude = opts.checkOptions
.linksToInclude!.map(linkToInclude => {
return new RegExp(linkToInclude).test(opts.url.href);
})
.filter(Boolean);

if (skips.length > 0) {
if (skips.length > 0 && forceInclude.length === 0) {
const result: LinkResult = {
url: opts.url.href,
state: LinkState.SKIPPED,
Expand Down
3 changes: 2 additions & 1 deletion test/fixtures/config/linkinator.config.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,6 @@
"recurse": true,
"silent": true,
"concurrency": 17,
"skip": "🌳"
"skip": "🌳",
"include": "🚀"
}
10 changes: 10 additions & 0 deletions test/fixtures/include/index.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
<html>
<body>
<a href="http://fake.local/INCLUDED_part/end">we should include dis one</a>
<a href="http://fake.local/included_part">we should include dis one</a>
<a href="http://fake.local/very_bad">we should skip dis one</a>
<a href="http://fake.local/force_included_part_very_bad/for_example">we should include dis one forcely</a>
<a href="http://fake.local/very_bad/force_included_PART">we should include dis one forcely</a>
<a href="http://fake.local/something/else/very_bad">we should skip dis one</a>
</body>
</html>
24 changes: 24 additions & 0 deletions test/test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,30 @@ describe('linkinator', () => {
);
});

it('should include even skipped links if asked forcely', async () => {
const scope = nock('http://fake.local')
.head('/INCLUDED_part/end')
.reply(200)
.head('/included_part')
.reply(200)
.head('/force_included_part_very_bad/for_example')
.reply(200)
.head('/very_bad/force_included_PART')
.reply(200);
const results = await check({
path: 'test/fixtures/include',
linksToSkip: [/very_bad/],
linksToInclude: [/included_part/i],
});
assert.ok(results.passed);
assert.strictEqual(results.links.length, 7);
assert.strictEqual(
results.links.filter(x => x.state === LinkState.SKIPPED).length,
2
);
scope.done();
});

it('should report broken links', async () => {
const scope = nock('http://fake.local')
.head('/')
Expand Down