1717//! should catch the majority of "broken link" cases.
1818
1919use std:: cell:: { Cell , RefCell } ;
20+ use std:: collections:: hash_map:: Entry ;
2021use std:: collections:: { HashMap , HashSet } ;
2122use std:: fs;
22- use std:: io :: ErrorKind ;
23+ use std:: iter :: once ;
2324use std:: path:: { Component , Path , PathBuf } ;
2425use std:: rc:: Rc ;
2526use std:: time:: Instant ;
@@ -112,6 +113,7 @@ macro_rules! t {
112113
113114struct Cli {
114115 docs : PathBuf ,
116+ link_targets_dirs : Vec < PathBuf > ,
115117}
116118
117119fn main ( ) {
@@ -123,7 +125,11 @@ fn main() {
123125 }
124126 } ;
125127
126- let mut checker = Checker { root : cli. docs . clone ( ) , cache : HashMap :: new ( ) } ;
128+ let mut checker = Checker {
129+ root : cli. docs . clone ( ) ,
130+ link_targets_dirs : cli. link_targets_dirs ,
131+ cache : HashMap :: new ( ) ,
132+ } ;
127133 let mut report = Report {
128134 errors : 0 ,
129135 start : Instant :: now ( ) ,
@@ -144,19 +150,26 @@ fn main() {
144150}
145151
146152fn parse_cli ( ) -> Result < Cli , String > {
147- fn to_canonical_path ( arg : & str ) -> Result < PathBuf , String > {
148- PathBuf :: from ( arg) . canonicalize ( ) . map_err ( |e| format ! ( "could not canonicalize {arg}: {e}" ) )
153+ fn to_absolute_path ( arg : & str ) -> Result < PathBuf , String > {
154+ std :: path :: absolute ( arg) . map_err ( |e| format ! ( "could not convert to absolute {arg}: {e}" ) )
149155 }
150156
151157 let mut verbatim = false ;
152158 let mut docs = None ;
159+ let mut link_targets_dirs = Vec :: new ( ) ;
153160
154161 let mut args = std:: env:: args ( ) . skip ( 1 ) ;
155162 while let Some ( arg) = args. next ( ) {
156163 if !verbatim && arg == "--" {
157164 verbatim = true ;
158165 } else if !verbatim && ( arg == "-h" || arg == "--help" ) {
159166 usage_and_exit ( 0 )
167+ } else if !verbatim && arg == "--link-targets-dir" {
168+ link_targets_dirs. push ( to_absolute_path (
169+ & args. next ( ) . ok_or ( "missing value for --link-targets-dir" ) ?,
170+ ) ?) ;
171+ } else if !verbatim && let Some ( value) = arg. strip_prefix ( "--link-targets-dir=" ) {
172+ link_targets_dirs. push ( to_absolute_path ( value) ?) ;
160173 } else if !verbatim && arg. starts_with ( '-' ) {
161174 return Err ( format ! ( "unknown flag: {arg}" ) ) ;
162175 } else if docs. is_none ( ) {
@@ -166,16 +179,20 @@ fn parse_cli() -> Result<Cli, String> {
166179 }
167180 }
168181
169- Ok ( Cli { docs : to_canonical_path ( & docs. ok_or ( "missing first positional argument" ) ?) ? } )
182+ Ok ( Cli {
183+ docs : to_absolute_path ( & docs. ok_or ( "missing first positional argument" ) ?) ?,
184+ link_targets_dirs,
185+ } )
170186}
171187
172188fn usage_and_exit ( code : i32 ) -> ! {
173- eprintln ! ( "usage: linkchecker <path> " ) ;
189+ eprintln ! ( "usage: linkchecker PATH [--link-targets-dir=PATH ...] " ) ;
174190 std:: process:: exit ( code)
175191}
176192
177193struct Checker {
178194 root : PathBuf ,
195+ link_targets_dirs : Vec < PathBuf > ,
179196 cache : Cache ,
180197}
181198
@@ -461,37 +478,34 @@ impl Checker {
461478
462479 /// Load a file from disk, or from the cache if available.
463480 fn load_file ( & mut self , file : & Path , report : & mut Report ) -> ( String , & FileEntry ) {
464- // https://docs.microsoft.com/en-us/windows/win32/debug/system-error-codes--0-499-
465- #[ cfg( windows) ]
466- const ERROR_INVALID_NAME : i32 = 123 ;
467-
468481 let pretty_path =
469482 file. strip_prefix ( & self . root ) . unwrap_or ( file) . to_str ( ) . unwrap ( ) . to_string ( ) ;
470483
471- let entry =
472- self . cache . entry ( pretty_path. clone ( ) ) . or_insert_with ( || match fs:: metadata ( file) {
484+ for base in once ( & self . root ) . chain ( self . link_targets_dirs . iter ( ) ) {
485+ let entry = self . cache . entry ( pretty_path. clone ( ) ) ;
486+ if let Entry :: Occupied ( e) = & entry
487+ && !matches ! ( e. get( ) , FileEntry :: Missing )
488+ {
489+ break ;
490+ }
491+
492+ let file = base. join ( & pretty_path) ;
493+ entry. insert_entry ( match fs:: metadata ( & file) {
473494 Ok ( metadata) if metadata. is_dir ( ) => FileEntry :: Dir ,
474495 Ok ( _) => {
475496 if file. extension ( ) . and_then ( |s| s. to_str ( ) ) != Some ( "html" ) {
476497 FileEntry :: OtherFile
477498 } else {
478499 report. html_files += 1 ;
479- load_html_file ( file, report)
500+ load_html_file ( & file, report)
480501 }
481502 }
482- Err ( e) if e. kind ( ) == ErrorKind :: NotFound => FileEntry :: Missing ,
483- Err ( e) => {
484- // If a broken intra-doc link contains `::`, on windows, it will cause `ERROR_INVALID_NAME` rather than `NotFound`.
485- // Explicitly check for that so that the broken link can be allowed in `LINKCHECK_EXCEPTIONS`.
486- #[ cfg( windows) ]
487- if e. raw_os_error ( ) == Some ( ERROR_INVALID_NAME )
488- && file. as_os_str ( ) . to_str ( ) . map_or ( false , |s| s. contains ( "::" ) )
489- {
490- return FileEntry :: Missing ;
491- }
492- panic ! ( "unexpected read error for {}: {}" , file. display( ) , e) ;
493- }
503+ Err ( e) if is_not_found_error ( & file, & e) => FileEntry :: Missing ,
504+ Err ( e) => panic ! ( "unexpected read error for {}: {}" , file. display( ) , e) ,
494505 } ) ;
506+ }
507+
508+ let entry = self . cache . get ( & pretty_path) . unwrap ( ) ;
495509 ( pretty_path, entry)
496510 }
497511}
@@ -670,3 +684,16 @@ fn parse_ids(ids: &mut HashSet<String>, file: &str, source: &str, report: &mut R
670684 ids. insert ( encoded) ;
671685 }
672686}
687+
688+ fn is_not_found_error ( path : & Path , error : & std:: io:: Error ) -> bool {
689+ // https://docs.microsoft.com/en-us/windows/win32/debug/system-error-codes--0-499-
690+ const WINDOWS_ERROR_INVALID_NAME : i32 = 123 ;
691+
692+ error. kind ( ) == std:: io:: ErrorKind :: NotFound
693+ // If a broken intra-doc link contains `::`, on windows, it will cause `ERROR_INVALID_NAME`
694+ // rather than `NotFound`. Explicitly check for that so that the broken link can be allowed
695+ // in `LINKCHECK_EXCEPTIONS`.
696+ || ( cfg ! ( windows)
697+ && error. raw_os_error ( ) == Some ( WINDOWS_ERROR_INVALID_NAME )
698+ && path. as_os_str ( ) . to_str ( ) . map_or ( false , |s| s. contains ( "::" ) ) )
699+ }
0 commit comments