Skip to content

Commit

Permalink
Replace check-links w/ dead-or-alive
Browse files Browse the repository at this point in the history
  • Loading branch information
wooorm committed Apr 10, 2024
1 parent c78eb47 commit 56b572a
Show file tree
Hide file tree
Showing 3 changed files with 445 additions and 220 deletions.
210 changes: 150 additions & 60 deletions index.js
Original file line number Diff line number Diff line change
@@ -1,82 +1,172 @@
/**
* @typedef {import('mdast').Nodes} Nodes
* @typedef {import('mdast').Resource} Resource
* @typedef {import('mdast').Root} Root
* @typedef {import('vfile').VFile} VFile
*/

/**
* @typedef Options
* Configuration.
* @property {string | null | undefined} [from]
* Check relative values relative to this URL (optiona, example:
* `'https://example.com/from'`).
* @property {boolean | null | undefined} [skipLocalhost=false]
* Whether to ignore `localhost` links such as `http://localhost/*`,
* `http://127.0.0.1/*` (default: `false`).
* @property {boolean | null | undefined} [skipOffline=false]
* Whether to let offline runs pass quietly (default: `false`).
* @property {Array<RegExp | string> | null | undefined} [skipUrlPatterns]
* List of patterns for URLs that should be skipped (optional);
* each URL will be tested against each pattern and will be ignored if
* `new RegExp(pattern).test(url) === true`.
*
* @typedef {Extract<Nodes, Resource>} Resources
* Resource nodes.
*/

import {ok as assert} from 'devlop'
import {deadOrAlive} from 'dead-or-alive'
import isOnline from 'is-online'
import {lintRule} from 'unified-lint-rule'
import {visit} from 'unist-util-visit'
import checkLinks from 'check-links'
import isOnline from 'is-online'

const remarkLintNoDeadUrls = lintRule('remark-lint:no-dead-urls', rule)

const defaultSkipUrlPatterns = [/^(?!https?)/i]

export default remarkLintNoDeadUrls

/**
* @typedef {import('mdast').Root} Root
* @typedef {import('mdast').Link} Link
* @typedef {import('mdast').Image} Image
* @typedef {import('mdast').Definition} Definition
* Check URLs.
*
* @typedef {Object} Options
* @property {import('got').OptionsOfTextResponseBody} [gotOptions]
* @property {boolean} [skipLocalhost]
* @property {boolean} [skipOffline]
* @property {Array<string | RegExp>} [skipUrlPatterns]
* @param {Root} tree
* Tree.
* @param {VFile} file
* File.
* @param {Readonly<Options> | null | undefined} [options]
* Configuration (optional).
* @returns {Promise<undefined>}
* Nothing.
*/
async function rule(tree, file, options) {
/** @type {Map<string, Array<Resources>>} */
const nodesByUrl = new Map()
const online = await isOnline()
const settings = options || {}
const skipUrlPatterns = settings.skipUrlPatterns
? settings.skipUrlPatterns.map((d) =>
typeof d === 'string' ? new RegExp(d) : d
)
: [...defaultSkipUrlPatterns]

/** @type {import('unified-lint-rule').Rule<Root, Options>} */
function noDeadUrls(ast, file, options) {
/** @type {{[url: string]: Array<Link | Image | Definition>}} */
const urlToNodes = {}

visit(ast, ['link', 'image', 'definition'], (node) => {
const url = /** @type {Link | Image | Definition} */ (node).url
if (
options.skipLocalhost &&
/^(https?:\/\/)(localhost|127\.0\.0\.1)(:\d+)?/.test(url)
) {
return
}
if (settings.skipLocalhost) {
defaultSkipUrlPatterns.push(/^(https?:\/\/)(localhost|127\.0\.0\.1)(:\d+)?/)
return
}

if (
options.skipUrlPatterns &&
options.skipUrlPatterns.some((skipPattern) =>
new RegExp(skipPattern).test(url)
)
) {
return
/* c8 ignore next 8 -- difficult to test */
if (!online) {
if (!settings.skipOffline) {
// To do: clean message.
file.message('You are not online and have not set skipOffline: true.')
}

if (!urlToNodes[url]) {
urlToNodes[url] = []
}
return
}

urlToNodes[url].push(/** @type {Link | Image | Definition} */ (node))
})
const meta = /** @type {Record<string, unknown> | undefined} */ (
file.data.meta
)

return checkLinks(Object.keys(urlToNodes), options.gotOptions).then(
(results) => {
for (const url of Object.keys(results)) {
const result = results[url]
if (result.status !== 'dead') continue
const from =
settings.from ||
(meta &&
typeof meta.origin === 'string' &&
typeof meta.pathname === 'string'
? new URL(meta.pathname, meta.origin).href
: undefined)

const nodes = urlToNodes[url]
visit(tree, function (node) {
if ('url' in node && typeof node.url === 'string') {
const value = node.url
const colon = value.indexOf(':')
const questionMark = value.indexOf('?')
const numberSign = value.indexOf('#')
const slash = value.indexOf('/')
let relativeToSomething = false

for (const node of nodes) {
file.message(`Link to ${url} is dead`, node)
}
if (
// If there is no protocol, it’s relative.
colon < 0 ||
// If the first colon is after a `?`, `#`, or `/`, it’s not a protocol.
(slash > -1 && colon > slash) ||
(questionMark > -1 && colon > questionMark) ||
(numberSign > -1 && colon > numberSign)
) {
relativeToSomething = true
}
}
)
}

/** @type {import('unified-lint-rule').Rule<Root, Options>} */
function wrapper(ast, file, options = {}) {
return isOnline().then((online) => {
if (!online) {
if (!options.skipOffline) {
file.message('You are not online and have not set skipOffline: true.')
// We can only check URLs relative to something if `from` is passed.
if (relativeToSomething && !from) {
return
}

return
}
const url = new URL(value, from).href

if (skipUrlPatterns.some((skipPattern) => skipPattern.test(url))) {
return
}

let list = nodesByUrl.get(url)

if (!list) {
list = []
nodesByUrl.set(url, list)
}

return noDeadUrls(ast, file, options)
list.push(node)
}
})
}

const remarkLintNoDeadLinks = lintRule('remark-lint:no-dead-urls', wrapper)
const urls = [...nodesByUrl.keys()]

await Promise.all(
urls.map(async function (url) {
const nodes = nodesByUrl.get(url)
assert(nodes)
const result = await deadOrAlive(url, {
findUrls: false
// To do:
// * `anchorAllowlist`
// * `checkAnchor`
// * `followMetaHttpEquiv`
// * `maxRedirects`
// * `maxRetries`
// * `resolveClobberPrefix`
// * `sleep`
// * `timeout`
// * `userAgent`
})

for (const node of nodes) {
for (const message of result.messages) {
// To do: enclose url in backticks.
const copy = file.message('Link to ' + url + ' is dead', {
cause: message,
place: node.position
})

copy.fatal = message.fatal
}

export default remarkLintNoDeadLinks
if (result.status === 'alive' && new URL(url).href !== result.url) {
// To do: clean message.
file.message('Link to ' + url + ' redirects to ' + result.url, {
place: node.position
})
}
}
})
)
}
17 changes: 10 additions & 7 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -31,30 +31,33 @@
],
"dependencies": {
"@types/mdast": "^4.0.0",
"check-links": "^2.0.0",
"dead-or-alive": "^1.0.0",
"devlop": "^1.0.0",
"is-online": "^10.0.0",
"unified-lint-rule": "^2.0.0",
"unist-util-visit": "^5.0.0"
"unified-lint-rule": "^3.0.0",
"unist-util-visit": "^5.0.0",
"vfile": "^6.0.0",
"vfile-message": "^4.0.0"
},
"devDependencies": {
"@types/node": "^20.0.0",
"c8": "^8.0.0",
"esmock": "^2.0.0",
"prettier": "^3.0.0",
"remark": "^15.0.0",
"remark-cli": "^12.0.0",
"remark-gfm": "^4.0.0",
"remark-preset-wooorm": "^9.0.0",
"remark-preset-wooorm": "^10.0.0",
"type-coverage": "^2.0.0",
"typescript": "^5.0.0",
"undici": "^6.0.0",
"vfile-sort": "^4.0.0",
"xo": "^0.56.0"
},
"scripts": {
"build": "tsc --build --clean && tsc --build && type-coverage",
"format": "remark . --frail --output --quiet && prettier . --log-level warn --write && xo --fix",
"prepack": "npm run build && npm run format",
"test": "npm run build && npm run format && npm run test-coverage",
"test-api": "node --conditions development --loader=esmock test.js",
"test-api": "node --conditions development test.js",
"test-coverage": "c8 --100 --reporter lcov npm run test-api"
},
"prettier": {
Expand Down
Loading

0 comments on commit 56b572a

Please sign in to comment.