1717//! should catch the majority of "broken link" cases. 
1818
1919use  std:: cell:: { Cell ,  RefCell } ; 
20+ use  std:: collections:: hash_map:: Entry ; 
2021use  std:: collections:: { HashMap ,  HashSet } ; 
21- use  std:: io:: ErrorKind ; 
22+ use  std:: fs; 
23+ use  std:: iter:: once; 
2224use  std:: path:: { Component ,  Path ,  PathBuf } ; 
2325use  std:: rc:: Rc ; 
2426use  std:: time:: Instant ; 
25- use  std:: { env,  fs} ; 
2627
2728use  html5ever:: tendril:: ByteTendril ; 
2829use  html5ever:: tokenizer:: { 
@@ -110,10 +111,25 @@ macro_rules! t {
110111    } ; 
111112} 
112113
114+ struct  Cli  { 
115+     docs :  PathBuf , 
116+     link_targets_dirs :  Vec < PathBuf > , 
117+ } 
118+ 
113119fn  main ( )  { 
114-     let  docs = env:: args_os ( ) . nth ( 1 ) . expect ( "doc path should be first argument" ) ; 
115-     let  docs = env:: current_dir ( ) . unwrap ( ) . join ( docs) ; 
116-     let  mut  checker = Checker  {  root :  docs. clone ( ) ,  cache :  HashMap :: new ( )  } ; 
120+     let  cli = match  parse_cli ( )  { 
121+         Ok ( cli)  => cli, 
122+         Err ( err)  => { 
123+             eprintln ! ( "error: {err}" ) ; 
124+             usage_and_exit ( 1 ) ; 
125+         } 
126+     } ; 
127+ 
128+     let  mut  checker = Checker  { 
129+         root :  cli. docs . clone ( ) , 
130+         link_targets_dirs :  cli. link_targets_dirs , 
131+         cache :  HashMap :: new ( ) , 
132+     } ; 
117133    let  mut  report = Report  { 
118134        errors :  0 , 
119135        start :  Instant :: now ( ) , 
@@ -125,16 +141,58 @@ fn main() {
125141        intra_doc_exceptions :  0 , 
126142        has_broken_urls :  false , 
127143    } ; 
128-     checker. walk ( & docs,  & mut  report) ; 
144+     checker. walk ( & cli . docs ,  & mut  report) ; 
129145    report. report ( ) ; 
130146    if  report. errors  != 0  { 
131147        println ! ( "found some broken links" ) ; 
132148        std:: process:: exit ( 1 ) ; 
133149    } 
134150} 
135151
152+ fn  parse_cli ( )  -> Result < Cli ,  String >  { 
153+     fn  to_absolute_path ( arg :  & str )  -> Result < PathBuf ,  String >  { 
154+         std:: path:: absolute ( arg) . map_err ( |e| format ! ( "could not convert to absolute {arg}: {e}" ) ) 
155+     } 
156+ 
157+     let  mut  verbatim = false ; 
158+     let  mut  docs = None ; 
159+     let  mut  link_targets_dirs = Vec :: new ( ) ; 
160+ 
161+     let  mut  args = std:: env:: args ( ) . skip ( 1 ) ; 
162+     while  let  Some ( arg)  = args. next ( )  { 
163+         if  !verbatim && arg == "--"  { 
164+             verbatim = true ; 
165+         }  else  if  !verbatim && ( arg == "-h"  || arg == "--help" )  { 
166+             usage_and_exit ( 0 ) 
167+         }  else  if  !verbatim && arg == "--link-targets-dir"  { 
168+             link_targets_dirs. push ( to_absolute_path ( 
169+                 & args. next ( ) . ok_or ( "missing value for --link-targets-dir" ) ?, 
170+             ) ?) ; 
171+         }  else  if  !verbatim && let  Some ( value)  = arg. strip_prefix ( "--link-targets-dir=" )  { 
172+             link_targets_dirs. push ( to_absolute_path ( value) ?) ; 
173+         }  else  if  !verbatim && arg. starts_with ( '-' )  { 
174+             return  Err ( format ! ( "unknown flag: {arg}" ) ) ; 
175+         }  else  if  docs. is_none ( )  { 
176+             docs = Some ( arg) ; 
177+         }  else  { 
178+             return  Err ( "too many positional arguments" . into ( ) ) ; 
179+         } 
180+     } 
181+ 
182+     Ok ( Cli  { 
183+         docs :  to_absolute_path ( & docs. ok_or ( "missing first positional argument" ) ?) ?, 
184+         link_targets_dirs, 
185+     } ) 
186+ } 
187+ 
188+ fn  usage_and_exit ( code :  i32 )  -> ! { 
189+     eprintln ! ( "usage: linkchecker PATH [--link-targets-dir=PATH ...]" ) ; 
190+     std:: process:: exit ( code) 
191+ } 
192+ 
136193struct  Checker  { 
137194    root :  PathBuf , 
195+     link_targets_dirs :  Vec < PathBuf > , 
138196    cache :  Cache , 
139197} 
140198
@@ -420,37 +478,34 @@ impl Checker {
420478
421479    /// Load a file from disk, or from the cache if available. 
422480fn  load_file ( & mut  self ,  file :  & Path ,  report :  & mut  Report )  -> ( String ,  & FileEntry )  { 
423-         // https://docs.microsoft.com/en-us/windows/win32/debug/system-error-codes--0-499- 
424-         #[ cfg( windows) ]  
425-         const  ERROR_INVALID_NAME :  i32  = 123 ; 
426- 
427481        let  pretty_path =
428482            file. strip_prefix ( & self . root ) . unwrap_or ( file) . to_str ( ) . unwrap ( ) . to_string ( ) ; 
429483
430-         let  entry =
431-             self . cache . entry ( pretty_path. clone ( ) ) . or_insert_with ( || match  fs:: metadata ( file)  { 
484+         for  base in  once ( & self . root ) . chain ( self . link_targets_dirs . iter ( ) )  { 
485+             let  entry = self . cache . entry ( pretty_path. clone ( ) ) ; 
486+             if  let  Entry :: Occupied ( e)  = & entry
487+                 && !matches ! ( e. get( ) ,  FileEntry :: Missing ) 
488+             { 
489+                 break ; 
490+             } 
491+ 
492+             let  file = base. join ( & pretty_path) ; 
493+             entry. insert_entry ( match  fs:: metadata ( & file)  { 
432494                Ok ( metadata)  if  metadata. is_dir ( )  => FileEntry :: Dir , 
433495                Ok ( _)  => { 
434496                    if  file. extension ( ) . and_then ( |s| s. to_str ( ) )  != Some ( "html" )  { 
435497                        FileEntry :: OtherFile 
436498                    }  else  { 
437499                        report. html_files  += 1 ; 
438-                         load_html_file ( file,  report) 
500+                         load_html_file ( & file,  report) 
439501                    } 
440502                } 
441-                 Err ( e)  if  e. kind ( )  == ErrorKind :: NotFound  => FileEntry :: Missing , 
442-                 Err ( e)  => { 
443-                     // If a broken intra-doc link contains `::`, on windows, it will cause `ERROR_INVALID_NAME` rather than `NotFound`. 
444-                     // Explicitly check for that so that the broken link can be allowed in `LINKCHECK_EXCEPTIONS`. 
445-                     #[ cfg( windows) ]  
446-                     if  e. raw_os_error ( )  == Some ( ERROR_INVALID_NAME ) 
447-                         && file. as_os_str ( ) . to_str ( ) . map_or ( false ,  |s| s. contains ( "::" ) ) 
448-                     { 
449-                         return  FileEntry :: Missing ; 
450-                     } 
451-                     panic ! ( "unexpected read error for {}: {}" ,  file. display( ) ,  e) ; 
452-                 } 
503+                 Err ( e)  if  is_not_found_error ( & file,  & e)  => FileEntry :: Missing , 
504+                 Err ( e)  => panic ! ( "unexpected read error for {}: {}" ,  file. display( ) ,  e) , 
453505            } ) ; 
506+         } 
507+ 
508+         let  entry = self . cache . get ( & pretty_path) . unwrap ( ) ; 
454509        ( pretty_path,  entry) 
455510    } 
456511} 
@@ -629,3 +684,16 @@ fn parse_ids(ids: &mut HashSet<String>, file: &str, source: &str, report: &mut R
629684        ids. insert ( encoded) ; 
630685    } 
631686} 
687+ 
688+ fn  is_not_found_error ( path :  & Path ,  error :  & std:: io:: Error )  -> bool  { 
689+     // https://docs.microsoft.com/en-us/windows/win32/debug/system-error-codes--0-499- 
690+     const  WINDOWS_ERROR_INVALID_NAME :  i32  = 123 ; 
691+ 
692+     error. kind ( )  == std:: io:: ErrorKind :: NotFound 
693+         // If a broken intra-doc link contains `::`, on windows, it will cause `ERROR_INVALID_NAME` 
694+         // rather than `NotFound`. Explicitly check for that so that the broken link can be allowed 
695+         // in `LINKCHECK_EXCEPTIONS`. 
696+         || ( cfg ! ( windows) 
697+             && error. raw_os_error ( )  == Some ( WINDOWS_ERROR_INVALID_NAME ) 
698+             && path. as_os_str ( ) . to_str ( ) . map_or ( false ,  |s| s. contains ( "::" ) ) ) 
699+ } 
0 commit comments