Skip to content

Commit c9f8df9

Browse files
authored
repo sync
2 parents 2df63db + d469ed2 commit c9f8df9

File tree

2 files changed

+70
-32
lines changed

2 files changed

+70
-32
lines changed

lib/excluded-links.js

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
// Linkinator treats the following as regex.
22
module.exports = [
33
// Skip GitHub search links.
4-
'https://github.com/search?.*',
5-
'https://github.com/github/gitignore/search?',
4+
'https://github.com/search\\?',
5+
'https://github.com/github/gitignore/search\\?',
66

77
// These links require auth.
88
'https://github.com/settings/profile',
@@ -15,6 +15,6 @@ module.exports = [
1515

1616
// Oneoff links that link checkers think are broken but are not.
1717
'https://haveibeenpwned.com/',
18-
'https://www.ilo.org/dyn/normlex/en/f?p=NORMLEXPUB:12100:0::NO::P12100_ILO_CODE:P029',
18+
'https://www.ilo.org/dyn/normlex/en/f\\?p=NORMLEXPUB:12100:0::NO::P12100_ILO_CODE:P029',
1919
'http://www.w3.org/wiki/LinkHeader/'
2020
]

script/check-english-links.js

Lines changed: 67 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -3,31 +3,35 @@
33
const path = require('path')
44
const fs = require('fs')
55
const linkinator = require('linkinator')
6-
const dedent = require('dedent')
76
const program = require('commander')
8-
const { escapeRegExp } = require('lodash')
7+
const { pull, uniq } = require('lodash')
98
const checker = new linkinator.LinkChecker()
109
const rimraf = require('rimraf').sync
10+
const mkdirp = require('mkdirp').sync
1111
const root = 'https://docs.github.com'
1212
const englishRoot = `${root}/en`
1313
const { deprecated } = require('../lib/enterprise-server-releases')
14+
const got = require('got')
15+
16+
// Links with these codes may or may not really be broken.
17+
const retryStatusCodes = [429, 503]
1418

1519
// [start-readme]
1620
//
1721
// This script runs once per day via a scheduled GitHub Action to check all links in
1822
// English content, not including deprecated Enterprise Server content. It opens an issue
19-
// if it finds broken links. To exclude a link, add it to `lib/excluded-links.js`.
23+
// if it finds broken links. To exclude a link path, add it to `lib/excluded-links.js`.
2024
//
2125
// [end-readme]
2226

2327
program
2428
.description('Check all links in the English docs.')
2529
.option('-d, --dry-run', 'Turn off recursion to get a fast minimal report (useful for previewing output).')
30+
.option('-p, --path <PATH>', 'Provide an optional path to check. Best used with --dry-run. If not provided, defaults to the homepage.')
2631
.parse(process.argv)
2732

2833
// Skip excluded links defined in separate file.
2934
const excludedLinks = require('../lib/excluded-links')
30-
.map(link => escapeRegExp(link))
3135

3236
// Skip non-English content.
3337
const languagesToSkip = Object.keys(require('../lib/languages'))
@@ -40,7 +44,7 @@ const languagesToSkip = Object.keys(require('../lib/languages'))
4044
const enterpriseReleasesToSkip = new RegExp(`${root}.+?[/@](${deprecated.join('|')})/`)
4145

4246
const config = {
43-
path: englishRoot,
47+
path: program.path || englishRoot,
4448
concurrency: 300,
4549
// If this is a dry run, turn off recursion.
4650
recurse: !program.dryRun,
@@ -56,40 +60,74 @@ const config = {
5660
main()
5761

5862
async function main () {
59-
const startTime = new Date()
60-
6163
// Clear and recreate a directory for logs.
6264
const logFile = path.join(__dirname, '../.linkinator/full.log')
6365
rimraf(path.dirname(logFile))
64-
fs.mkdirSync(path.dirname(logFile), { recursive: true })
66+
mkdirp(path.dirname(logFile))
6567

6668
// Update CLI output and append to logfile after each checked link.
6769
checker.on('link', result => {
6870
fs.appendFileSync(logFile, JSON.stringify(result) + '\n')
6971
})
7072

7173
// Start the scan; events will be logged as they occur.
72-
const result = await checker.check(config)
73-
74-
// Scan is complete! Display the results.
75-
const endTime = new Date()
76-
const skippedLinks = result.links.filter(x => x.state === 'SKIPPED')
77-
const brokenLinks = result.links.filter(x => x.state === 'BROKEN')
78-
79-
console.log(dedent`
80-
${brokenLinks.length} broken links found on docs.github.com
81-
82-
Link scan completed in ${endTime - startTime}ms
83-
Total links: ${result.links.length}
84-
Skipped links: ${skippedLinks.length}
85-
Broken links: ${brokenLinks.length}
86-
For more details see ${path.relative(process.cwd(), logFile)}
87-
`)
88-
89-
if (brokenLinks.length) {
90-
console.log('\n\n' + JSON.stringify(brokenLinks, null, 2))
91-
process.exit(1)
74+
const result = (await checker.check(config)).links
75+
76+
// Scan is complete! Filter the results for broken links.
77+
const brokenLinks = result
78+
.filter(link => link.state === 'BROKEN')
79+
80+
// Links to retry individually.
81+
const linksToRetry = brokenLinks
82+
.filter(link => !link.status || retryStatusCodes.includes(link.status))
83+
84+
await Promise.all(linksToRetry
85+
.map(async (link) => {
86+
try {
87+
// got throws an HTTPError if response code is not 2xx or 3xx.
88+
// If got succeeds, we can remove the link from the list.
89+
await got(link.url)
90+
pull(brokenLinks, link)
91+
// If got fails, do nothing. The link is already in the broken list.
92+
} catch (err) {
93+
// noop
94+
}
95+
}))
96+
97+
// Exit successfully if no broken links!
98+
if (!brokenLinks.length) {
99+
console.log('All links are good!')
100+
process.exit(0)
92101
}
93102

94-
process.exit(0)
103+
// Format and display the results.
104+
console.log(`${brokenLinks.length} broken links found on docs.github.com\n`)
105+
displayBrokenLinks(brokenLinks)
106+
107+
// Exit unsuccessfully if broken links are found.
108+
process.exit(1)
109+
}
110+
111+
function displayBrokenLinks (brokenLinks) {
112+
// Sort results by status code.
113+
const allStatusCodes = uniq(brokenLinks
114+
// Coerce undefined status codes into `Invalid` strings so we can display them.
115+
// Without this, undefined codes get JSON.stringified as `0`, which is not useful output.
116+
.map(link => {
117+
if (!link.status) link.status = 'Invalid'
118+
return link
119+
})
120+
.map(link => link.status)
121+
)
122+
123+
allStatusCodes.forEach(statusCode => {
124+
const brokenLinksForStatus = brokenLinks.filter(x => x.status === statusCode)
125+
126+
console.log(`## Status ${statusCode}: Found ${brokenLinksForStatus.length} broken links`)
127+
console.log('```')
128+
brokenLinksForStatus.forEach(brokenLinkObj => {
129+
console.log(JSON.stringify(brokenLinkObj, null, 2))
130+
})
131+
console.log('```')
132+
})
95133
}

0 commit comments

Comments
 (0)