Skip to content

repo sync #1479

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Nov 20, 2020
6 changes: 3 additions & 3 deletions lib/excluded-links.js
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
// Linkinator treats the following as regex.
module.exports = [
// Skip GitHub search links.
'https://github.com/search?.*',
'https://github.com/github/gitignore/search?',
'https://github.com/search\\?',
'https://github.com/github/gitignore/search\\?',

// These links require auth.
'https://github.com/settings/profile',
Expand All @@ -15,6 +15,6 @@ module.exports = [

// Oneoff links that link checkers think are broken but are not.
'https://haveibeenpwned.com/',
'https://www.ilo.org/dyn/normlex/en/f?p=NORMLEXPUB:12100:0::NO::P12100_ILO_CODE:P029',
'https://www.ilo.org/dyn/normlex/en/f\\?p=NORMLEXPUB:12100:0::NO::P12100_ILO_CODE:P029',
'http://www.w3.org/wiki/LinkHeader/'
]
96 changes: 67 additions & 29 deletions script/check-english-links.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,31 +3,35 @@
const path = require('path')
const fs = require('fs')
const linkinator = require('linkinator')
const dedent = require('dedent')
const program = require('commander')
const { escapeRegExp } = require('lodash')
const { pull, uniq } = require('lodash')
const checker = new linkinator.LinkChecker()
const rimraf = require('rimraf').sync
const mkdirp = require('mkdirp').sync
const root = 'https://docs.github.com'
const englishRoot = `${root}/en`
const { deprecated } = require('../lib/enterprise-server-releases')
const got = require('got')

// Links with these codes may or may not really be broken.
const retryStatusCodes = [429, 503]

// [start-readme]
//
// This script runs once per day via a scheduled GitHub Action to check all links in
// English content, not including deprecated Enterprise Server content. It opens an issue
// if it finds broken links. To exclude a link, add it to `lib/excluded-links.js`.
// if it finds broken links. To exclude a link path, add it to `lib/excluded-links.js`.
//
// [end-readme]

program
.description('Check all links in the English docs.')
.option('-d, --dry-run', 'Turn off recursion to get a fast minimal report (useful for previewing output).')
.option('-p, --path <PATH>', 'Provide an optional path to check. Best used with --dry-run. If not provided, defaults to the homepage.')
.parse(process.argv)

// Skip excluded links defined in separate file.
const excludedLinks = require('../lib/excluded-links')
.map(link => escapeRegExp(link))

// Skip non-English content.
const languagesToSkip = Object.keys(require('../lib/languages'))
Expand All @@ -40,7 +44,7 @@ const languagesToSkip = Object.keys(require('../lib/languages'))
const enterpriseReleasesToSkip = new RegExp(`${root}.+?[/@](${deprecated.join('|')})/`)

const config = {
path: englishRoot,
path: program.path || englishRoot,
concurrency: 300,
// If this is a dry run, turn off recursion.
recurse: !program.dryRun,
Expand All @@ -56,40 +60,74 @@ const config = {
main()

async function main () {
const startTime = new Date()

// Clear and recreate a directory for logs.
const logFile = path.join(__dirname, '../.linkinator/full.log')
rimraf(path.dirname(logFile))
fs.mkdirSync(path.dirname(logFile), { recursive: true })
mkdirp(path.dirname(logFile))

// Update CLI output and append to logfile after each checked link.
checker.on('link', result => {
fs.appendFileSync(logFile, JSON.stringify(result) + '\n')
})

// Start the scan; events will be logged as they occur.
const result = await checker.check(config)

// Scan is complete! Display the results.
const endTime = new Date()
const skippedLinks = result.links.filter(x => x.state === 'SKIPPED')
const brokenLinks = result.links.filter(x => x.state === 'BROKEN')

console.log(dedent`
${brokenLinks.length} broken links found on docs.github.com

Link scan completed in ${endTime - startTime}ms
Total links: ${result.links.length}
Skipped links: ${skippedLinks.length}
Broken links: ${brokenLinks.length}
For more details see ${path.relative(process.cwd(), logFile)}
`)

if (brokenLinks.length) {
console.log('\n\n' + JSON.stringify(brokenLinks, null, 2))
process.exit(1)
const result = (await checker.check(config)).links

// Scan is complete! Filter the results for broken links.
const brokenLinks = result
.filter(link => link.state === 'BROKEN')

// Links to retry individually.
const linksToRetry = brokenLinks
.filter(link => !link.status || retryStatusCodes.includes(link.status))

await Promise.all(linksToRetry
.map(async (link) => {
try {
// got throws an HTTPError if response code is not 2xx or 3xx.
// If got succeeds, we can remove the link from the list.
await got(link.url)
pull(brokenLinks, link)
// If got fails, do nothing. The link is already in the broken list.
} catch (err) {
// noop
}
}))

// Exit successfully if no broken links!
if (!brokenLinks.length) {
console.log('All links are good!')
process.exit(0)
}

process.exit(0)
// Format and display the results.
console.log(`${brokenLinks.length} broken links found on docs.github.com\n`)
displayBrokenLinks(brokenLinks)

// Exit unsuccessfully if broken links are found.
process.exit(1)
}

function displayBrokenLinks (brokenLinks) {
// Sort results by status code.
const allStatusCodes = uniq(brokenLinks
// Coerce undefined status codes into `Invalid` strings so we can display them.
// Without this, undefined codes get JSON.stringified as `0`, which is not useful output.
.map(link => {
if (!link.status) link.status = 'Invalid'
return link
})
.map(link => link.status)
)

allStatusCodes.forEach(statusCode => {
const brokenLinksForStatus = brokenLinks.filter(x => x.status === statusCode)

console.log(`## Status ${statusCode}: Found ${brokenLinksForStatus.length} broken links`)
console.log('```')
brokenLinksForStatus.forEach(brokenLinkObj => {
console.log(JSON.stringify(brokenLinkObj, null, 2))
})
console.log('```')
})
}