3
3
const path = require ( 'path' )
4
4
const fs = require ( 'fs' )
5
5
const linkinator = require ( 'linkinator' )
6
- const dedent = require ( 'dedent' )
7
6
const program = require ( 'commander' )
8
- const { escapeRegExp } = require ( 'lodash' )
7
+ const { pull , uniq } = require ( 'lodash' )
9
8
const checker = new linkinator . LinkChecker ( )
10
9
const rimraf = require ( 'rimraf' ) . sync
10
+ const mkdirp = require ( 'mkdirp' ) . sync
11
11
const root = 'https://docs.github.com'
12
12
const englishRoot = `${ root } /en`
13
13
const { deprecated } = require ( '../lib/enterprise-server-releases' )
14
+ const got = require ( 'got' )
15
+
16
+ // Links with these codes may or may not really be broken.
17
+ const retryStatusCodes = [ 429 , 503 ]
14
18
15
19
// [start-readme]
16
20
//
17
21
// This script runs once per day via a scheduled GitHub Action to check all links in
18
22
// English content, not including deprecated Enterprise Server content. It opens an issue
19
- // if it finds broken links. To exclude a link, add it to `lib/excluded-links.js`.
23
+ // if it finds broken links. To exclude a link path , add it to `lib/excluded-links.js`.
20
24
//
21
25
// [end-readme]
22
26
23
27
program
24
28
. description ( 'Check all links in the English docs.' )
25
29
. option ( '-d, --dry-run' , 'Turn off recursion to get a fast minimal report (useful for previewing output).' )
30
+ . option ( '-p, --path <PATH>' , 'Provide an optional path to check. Best used with --dry-run. If not provided, defaults to the homepage.' )
26
31
. parse ( process . argv )
27
32
28
33
// Skip excluded links defined in separate file.
29
34
const excludedLinks = require ( '../lib/excluded-links' )
30
- . map ( link => escapeRegExp ( link ) )
31
35
32
36
// Skip non-English content.
33
37
const languagesToSkip = Object . keys ( require ( '../lib/languages' ) )
@@ -40,7 +44,7 @@ const languagesToSkip = Object.keys(require('../lib/languages'))
40
44
const enterpriseReleasesToSkip = new RegExp ( `${ root } .+?[/@](${ deprecated . join ( '|' ) } )/` )
41
45
42
46
const config = {
43
- path : englishRoot ,
47
+ path : program . path || englishRoot ,
44
48
concurrency : 300 ,
45
49
// If this is a dry run, turn off recursion.
46
50
recurse : ! program . dryRun ,
@@ -56,40 +60,74 @@ const config = {
56
60
main ( )
57
61
58
62
async function main ( ) {
59
- const startTime = new Date ( )
60
-
61
63
// Clear and recreate a directory for logs.
62
64
const logFile = path . join ( __dirname , '../.linkinator/full.log' )
63
65
rimraf ( path . dirname ( logFile ) )
64
- fs . mkdirSync ( path . dirname ( logFile ) , { recursive : true } )
66
+ mkdirp ( path . dirname ( logFile ) )
65
67
66
68
// Update CLI output and append to logfile after each checked link.
67
69
checker . on ( 'link' , result => {
68
70
fs . appendFileSync ( logFile , JSON . stringify ( result ) + '\n' )
69
71
} )
70
72
71
73
// Start the scan; events will be logged as they occur.
72
- const result = await checker . check ( config )
73
-
74
- // Scan is complete! Display the results.
75
- const endTime = new Date ( )
76
- const skippedLinks = result . links . filter ( x => x . state === 'SKIPPED' )
77
- const brokenLinks = result . links . filter ( x => x . state === 'BROKEN' )
78
-
79
- console . log ( dedent `
80
- ${ brokenLinks . length } broken links found on docs.github.com
81
-
82
- Link scan completed in ${ endTime - startTime } ms
83
- Total links: ${ result . links . length }
84
- Skipped links: ${ skippedLinks . length }
85
- Broken links: ${ brokenLinks . length }
86
- For more details see ${ path . relative ( process . cwd ( ) , logFile ) }
87
- ` )
88
-
89
- if ( brokenLinks . length ) {
90
- console . log ( '\n\n' + JSON . stringify ( brokenLinks , null , 2 ) )
91
- process . exit ( 1 )
74
+ const result = ( await checker . check ( config ) ) . links
75
+
76
+ // Scan is complete! Filter the results for broken links.
77
+ const brokenLinks = result
78
+ . filter ( link => link . state === 'BROKEN' )
79
+
80
+ // Links to retry individually.
81
+ const linksToRetry = brokenLinks
82
+ . filter ( link => ! link . status || retryStatusCodes . includes ( link . status ) )
83
+
84
+ await Promise . all ( linksToRetry
85
+ . map ( async ( link ) => {
86
+ try {
87
+ // got throws an HTTPError if response code is not 2xx or 3xx.
88
+ // If got succeeds, we can remove the link from the list.
89
+ await got ( link . url )
90
+ pull ( brokenLinks , link )
91
+ // If got fails, do nothing. The link is already in the broken list.
92
+ } catch ( err ) {
93
+ // noop
94
+ }
95
+ } ) )
96
+
97
+ // Exit successfully if no broken links!
98
+ if ( ! brokenLinks . length ) {
99
+ console . log ( 'All links are good!' )
100
+ process . exit ( 0 )
92
101
}
93
102
94
- process . exit ( 0 )
103
+ // Format and display the results.
104
+ console . log ( `${ brokenLinks . length } broken links found on docs.github.com\n` )
105
+ displayBrokenLinks ( brokenLinks )
106
+
107
+ // Exit unsuccessfully if broken links are found.
108
+ process . exit ( 1 )
109
+ }
110
+
111
+ function displayBrokenLinks ( brokenLinks ) {
112
+ // Sort results by status code.
113
+ const allStatusCodes = uniq ( brokenLinks
114
+ // Coerce undefined status codes into `Invalid` strings so we can display them.
115
+ // Without this, undefined codes get JSON.stringified as `0`, which is not useful output.
116
+ . map ( link => {
117
+ if ( ! link . status ) link . status = 'Invalid'
118
+ return link
119
+ } )
120
+ . map ( link => link . status )
121
+ )
122
+
123
+ allStatusCodes . forEach ( statusCode => {
124
+ const brokenLinksForStatus = brokenLinks . filter ( x => x . status === statusCode )
125
+
126
+ console . log ( `## Status ${ statusCode } : Found ${ brokenLinksForStatus . length } broken links` )
127
+ console . log ( '```' )
128
+ brokenLinksForStatus . forEach ( brokenLinkObj => {
129
+ console . log ( JSON . stringify ( brokenLinkObj , null , 2 ) )
130
+ } )
131
+ console . log ( '```' )
132
+ } )
95
133
}
0 commit comments